xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision bef158480efac06de457f7a665168877ab3c2fd7)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
44 {
45   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46   PetscErrorCode ierr;
47   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
48 
49   PetscFunctionBegin;
50   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
51   lrow = row - rstart;
52   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57 {
58   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
59   PetscErrorCode ierr;
60   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
61 
62   PetscFunctionBegin;
63   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
64   lrow = row - rstart;
65   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70 {
71   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
72   PetscErrorCode ierr;
73   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
74   PetscScalar    *array;
75   MPI_Comm       comm;
76   PetscBool      flg;
77   Mat            B;
78 
79   PetscFunctionBegin;
80   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
81   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
82   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
83   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
84 
85     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
86     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
87     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
88     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
89     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
90     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
91     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
92     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
93     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
94     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
95     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
96     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
97     *a   = B;
98     ierr = MatDestroy(&B);CHKERRQ(ierr);
99   } else *a = B;
100   PetscFunctionReturn(0);
101 }
102 
103 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
104 {
105   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
106   PetscErrorCode ierr;
107   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
108   PetscBool      roworiented = A->roworiented;
109 
110   PetscFunctionBegin;
111   for (i=0; i<m; i++) {
112     if (idxm[i] < 0) continue;
113     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
114     if (idxm[i] >= rstart && idxm[i] < rend) {
115       row = idxm[i] - rstart;
116       if (roworiented) {
117         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
118       } else {
119         for (j=0; j<n; j++) {
120           if (idxn[j] < 0) continue;
121           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
122           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
123         }
124       }
125     } else if (!A->donotstash) {
126       mat->assembled = PETSC_FALSE;
127       if (roworiented) {
128         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
129       } else {
130         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
131       }
132     }
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
160 {
161   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
162   PetscErrorCode ierr;
163 
164   PetscFunctionBegin;
165   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
170 {
171   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
172   PetscBool      iscuda;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   if (!a->A) {
177     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
178     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
179     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
180     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
181     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
182     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
183     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
184     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
185   }
186   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
191 {
192   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
193   PetscErrorCode ierr;
194 
195   PetscFunctionBegin;
196   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
197   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
230   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
231   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
232   PetscFunctionReturn(0);
233 }
234 
235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
236 {
237   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
238   PetscErrorCode ierr;
239 
240   PetscFunctionBegin;
241   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
242   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
243   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
248 {
249   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
250   PetscErrorCode ierr;
251 
252   PetscFunctionBegin;
253   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
254   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
255   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
260 {
261   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
262   PetscErrorCode    ierr;
263   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
264   const PetscInt    *irow,*icol;
265   const PetscScalar *v;
266   PetscScalar       *bv;
267   Mat               newmat;
268   IS                iscol_local;
269   MPI_Comm          comm_is,comm_mat;
270 
271   PetscFunctionBegin;
272   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
273   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
274   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
275 
276   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
277   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
278   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
279   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
280   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
281   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
282 
283   /* No parallel redistribution currently supported! Should really check each index set
284      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
285      original matrix! */
286 
287   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
288   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
289 
290   /* Check submatrix call */
291   if (scall == MAT_REUSE_MATRIX) {
292     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
293     /* Really need to test rows and column sizes! */
294     newmat = *B;
295   } else {
296     /* Create and fill new matrix */
297     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
298     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
299     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
300     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
301   }
302 
303   /* Now extract the data pointers and do the copy, column at a time */
304   newmatd = (Mat_MPIDense*)newmat->data;
305   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
306   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
307   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
308   for (i=0; i<Ncols; i++) {
309     const PetscScalar *av = v + lda*icol[i];
310     for (j=0; j<nrows; j++) {
311       *bv++ = av[irow[j] - rstart];
312     }
313   }
314   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
315   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
316 
317   /* Assemble the matrices so that the correct flags are set */
318   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320 
321   /* Free work space */
322   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
323   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
324   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
325   *B   = newmat;
326   PetscFunctionReturn(0);
327 }
328 
329 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
330 {
331   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
332   PetscErrorCode ierr;
333 
334   PetscFunctionBegin;
335   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
336   PetscFunctionReturn(0);
337 }
338 
339 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
340 {
341   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
342   PetscErrorCode ierr;
343 
344   PetscFunctionBegin;
345   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352   PetscErrorCode ierr;
353 
354   PetscFunctionBegin;
355   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
356   PetscFunctionReturn(0);
357 }
358 
359 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
360 {
361   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       nstash,reallocs;
364 
365   PetscFunctionBegin;
366   if (mdn->donotstash || mat->nooffprocentries) return(0);
367 
368   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
369   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
370   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
371   PetscFunctionReturn(0);
372 }
373 
374 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
375 {
376   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
377   PetscErrorCode ierr;
378   PetscInt       i,*row,*col,flg,j,rstart,ncols;
379   PetscMPIInt    n;
380   PetscScalar    *val;
381 
382   PetscFunctionBegin;
383   if (!mdn->donotstash && !mat->nooffprocentries) {
384     /*  wait on receives */
385     while (1) {
386       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
387       if (!flg) break;
388 
389       for (i=0; i<n;) {
390         /* Now identify the consecutive vals belonging to the same row */
391         for (j=i,rstart=row[j]; j<n; j++) {
392           if (row[j] != rstart) break;
393         }
394         if (j < n) ncols = j-i;
395         else       ncols = n-i;
396         /* Now assemble all these values with a single function call */
397         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
398         i    = j;
399       }
400     }
401     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
402   }
403 
404   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
405   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
406 
407   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
408     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
409   }
410   PetscFunctionReturn(0);
411 }
412 
413 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
414 {
415   PetscErrorCode ierr;
416   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
417 
418   PetscFunctionBegin;
419   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
423 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
424 {
425   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
426   PetscErrorCode    ierr;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
438     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
441     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
442   }
443   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
444   if (diag != 0.0) {
445     Vec d;
446 
447     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
448     ierr = VecSet(d,diag);CHKERRQ(ierr);
449     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
450     ierr = VecDestroy(&d);CHKERRQ(ierr);
451   }
452   ierr = PetscFree(lrows);CHKERRQ(ierr);
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   PetscErrorCode    ierr;
465   const PetscScalar *ax;
466   PetscScalar       *ay;
467 
468   PetscFunctionBegin;
469   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
470   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
471   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
472   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
473   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
474   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
475   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
480 {
481   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
482   PetscErrorCode    ierr;
483   const PetscScalar *ax;
484   PetscScalar       *ay;
485 
486   PetscFunctionBegin;
487   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
488   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
489   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
490   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
491   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
492   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
493   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
498 {
499   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
500   PetscErrorCode    ierr;
501   const PetscScalar *ax;
502   PetscScalar       *ay;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
506   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
508   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
509   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
510   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
511   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
512   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
513   PetscFunctionReturn(0);
514 }
515 
516 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
517 {
518   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
519   PetscErrorCode    ierr;
520   const PetscScalar *ax;
521   PetscScalar       *ay;
522 
523   PetscFunctionBegin;
524   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
525   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
526   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
527   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
528   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
529   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
530   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
531   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
532   PetscFunctionReturn(0);
533 }
534 
535 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
536 {
537   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
538   PetscErrorCode    ierr;
539   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
540   PetscScalar       *x,zero = 0.0;
541   const PetscScalar *av;
542 
543   PetscFunctionBegin;
544   ierr = VecSet(v,zero);CHKERRQ(ierr);
545   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
546   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
547   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
548   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
549   radd = A->rmap->rstart*m;
550   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
551   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
552   for (i=0; i<len; i++) {
553     x[i] = av[radd + i*lda + i];
554   }
555   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
556   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 PetscErrorCode MatDestroy_MPIDense(Mat mat)
561 {
562   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
563   PetscErrorCode ierr;
564 
565   PetscFunctionBegin;
566 #if defined(PETSC_USE_LOG)
567   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
568 #endif
569   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
570   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
571   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
572   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
573   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
574   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
575   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
576   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
577 
578   ierr = PetscFree(mat->data);CHKERRQ(ierr);
579   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
580 
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
592 #if defined(PETSC_HAVE_ELEMENTAL)
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
594 #endif
595 #if defined(PETSC_HAVE_SCALAPACK)
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
597 #endif
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
601 #if defined (PETSC_HAVE_CUDA)
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
604 #endif
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
609   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
610   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
611   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
612   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
614   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
615   PetscFunctionReturn(0);
616 }
617 
618 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
619 
620 #include <petscdraw.h>
621 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
622 {
623   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
624   PetscErrorCode    ierr;
625   PetscMPIInt       rank;
626   PetscViewerType   vtype;
627   PetscBool         iascii,isdraw;
628   PetscViewer       sviewer;
629   PetscViewerFormat format;
630 
631   PetscFunctionBegin;
632   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
633   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
634   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
635   if (iascii) {
636     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
637     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
638     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
639       MatInfo info;
640       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
641       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
642       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
643       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
644       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
645       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
646       PetscFunctionReturn(0);
647     } else if (format == PETSC_VIEWER_ASCII_INFO) {
648       PetscFunctionReturn(0);
649     }
650   } else if (isdraw) {
651     PetscDraw draw;
652     PetscBool isnull;
653 
654     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
655     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
656     if (isnull) PetscFunctionReturn(0);
657   }
658 
659   {
660     /* assemble the entire matrix onto first processor. */
661     Mat         A;
662     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
663     PetscInt    *cols;
664     PetscScalar *vals;
665 
666     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
667     if (!rank) {
668       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
669     } else {
670       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
671     }
672     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
673     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
674     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
675     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
676 
677     /* Copy the matrix ... This isn't the most efficient means,
678        but it's quick for now */
679     A->insertmode = INSERT_VALUES;
680 
681     row = mat->rmap->rstart;
682     m   = mdn->A->rmap->n;
683     for (i=0; i<m; i++) {
684       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
685       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
686       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
687       row++;
688     }
689 
690     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
692     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
693     if (!rank) {
694       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
695       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
696     }
697     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
698     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
699     ierr = MatDestroy(&A);CHKERRQ(ierr);
700   }
701   PetscFunctionReturn(0);
702 }
703 
704 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
705 {
706   PetscErrorCode ierr;
707   PetscBool      iascii,isbinary,isdraw,issocket;
708 
709   PetscFunctionBegin;
710   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
711   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
712   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
713   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
714 
715   if (iascii || issocket || isdraw) {
716     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
717   } else if (isbinary) {
718     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
724 {
725   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
726   Mat            mdn  = mat->A;
727   PetscErrorCode ierr;
728   PetscLogDouble isend[5],irecv[5];
729 
730   PetscFunctionBegin;
731   info->block_size = 1.0;
732 
733   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
734 
735   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
736   isend[3] = info->memory;  isend[4] = info->mallocs;
737   if (flag == MAT_LOCAL) {
738     info->nz_used      = isend[0];
739     info->nz_allocated = isend[1];
740     info->nz_unneeded  = isend[2];
741     info->memory       = isend[3];
742     info->mallocs      = isend[4];
743   } else if (flag == MAT_GLOBAL_MAX) {
744     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
745 
746     info->nz_used      = irecv[0];
747     info->nz_allocated = irecv[1];
748     info->nz_unneeded  = irecv[2];
749     info->memory       = irecv[3];
750     info->mallocs      = irecv[4];
751   } else if (flag == MAT_GLOBAL_SUM) {
752     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
753 
754     info->nz_used      = irecv[0];
755     info->nz_allocated = irecv[1];
756     info->nz_unneeded  = irecv[2];
757     info->memory       = irecv[3];
758     info->mallocs      = irecv[4];
759   }
760   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
761   info->fill_ratio_needed = 0;
762   info->factor_mallocs    = 0;
763   PetscFunctionReturn(0);
764 }
765 
766 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
767 {
768   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
769   PetscErrorCode ierr;
770 
771   PetscFunctionBegin;
772   switch (op) {
773   case MAT_NEW_NONZERO_LOCATIONS:
774   case MAT_NEW_NONZERO_LOCATION_ERR:
775   case MAT_NEW_NONZERO_ALLOCATION_ERR:
776     MatCheckPreallocated(A,1);
777     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
778     break;
779   case MAT_ROW_ORIENTED:
780     MatCheckPreallocated(A,1);
781     a->roworiented = flg;
782     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
783     break;
784   case MAT_NEW_DIAGONALS:
785   case MAT_KEEP_NONZERO_PATTERN:
786   case MAT_USE_HASH_TABLE:
787   case MAT_SORTED_FULL:
788     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
789     break;
790   case MAT_IGNORE_OFF_PROC_ENTRIES:
791     a->donotstash = flg;
792     break;
793   case MAT_SYMMETRIC:
794   case MAT_STRUCTURALLY_SYMMETRIC:
795   case MAT_HERMITIAN:
796   case MAT_SYMMETRY_ETERNAL:
797   case MAT_IGNORE_LOWER_TRIANGULAR:
798   case MAT_IGNORE_ZERO_ENTRIES:
799     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
800     break;
801   default:
802     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
803   }
804   PetscFunctionReturn(0);
805 }
806 
807 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
808 {
809   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
810   const PetscScalar *l;
811   PetscScalar       x,*v,*vv,*r;
812   PetscErrorCode    ierr;
813   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
814 
815   PetscFunctionBegin;
816   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
817   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
818   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
819   if (ll) {
820     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
821     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
822     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
823     for (i=0; i<m; i++) {
824       x = l[i];
825       v = vv + i;
826       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
827     }
828     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
829     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
830   }
831   if (rr) {
832     const PetscScalar *ar;
833 
834     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
835     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
836     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
837     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
838     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
839     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
840     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
841     for (i=0; i<n; i++) {
842       x = r[i];
843       v = vv + i*lda;
844       for (j=0; j<m; j++) (*v++) *= x;
845     }
846     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
847     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
848   }
849   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
854 {
855   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
856   PetscErrorCode    ierr;
857   PetscInt          i,j;
858   PetscMPIInt       size;
859   PetscReal         sum = 0.0;
860   const PetscScalar *av,*v;
861 
862   PetscFunctionBegin;
863   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
864   v    = av;
865   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
866   if (size == 1) {
867     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
868   } else {
869     if (type == NORM_FROBENIUS) {
870       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
871         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
872       }
873       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
874       *nrm = PetscSqrtReal(*nrm);
875       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
876     } else if (type == NORM_1) {
877       PetscReal *tmp,*tmp2;
878       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
879       *nrm = 0.0;
880       v    = av;
881       for (j=0; j<mdn->A->cmap->n; j++) {
882         for (i=0; i<mdn->A->rmap->n; i++) {
883           tmp[j] += PetscAbsScalar(*v);  v++;
884         }
885       }
886       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
887       for (j=0; j<A->cmap->N; j++) {
888         if (tmp2[j] > *nrm) *nrm = tmp2[j];
889       }
890       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
891       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
892     } else if (type == NORM_INFINITY) { /* max row norm */
893       PetscReal ntemp;
894       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
895       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
896     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
897   }
898   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
903 {
904   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
905   Mat            B;
906   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
907   PetscErrorCode ierr;
908   PetscInt       j,i,lda;
909   PetscScalar    *v;
910 
911   PetscFunctionBegin;
912   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
913     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
914     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
915     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
916     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
917   } else B = *matout;
918 
919   m    = a->A->rmap->n; n = a->A->cmap->n;
920   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
921   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
922   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
923   for (i=0; i<m; i++) rwork[i] = rstart + i;
924   for (j=0; j<n; j++) {
925     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
926     v   += lda;
927   }
928   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
929   ierr = PetscFree(rwork);CHKERRQ(ierr);
930   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
932   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
933     *matout = B;
934   } else {
935     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
936   }
937   PetscFunctionReturn(0);
938 }
939 
940 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
941 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
942 
943 PetscErrorCode MatSetUp_MPIDense(Mat A)
944 {
945   PetscErrorCode ierr;
946 
947   PetscFunctionBegin;
948   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
949   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
950   if (!A->preallocated) {
951     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
957 {
958   PetscErrorCode ierr;
959   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
960 
961   PetscFunctionBegin;
962   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatConjugate_MPIDense(Mat mat)
967 {
968   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   ierr = MatConjugate(a->A);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 PetscErrorCode MatRealPart_MPIDense(Mat A)
977 {
978   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
979   PetscErrorCode ierr;
980 
981   PetscFunctionBegin;
982   ierr = MatRealPart(a->A);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
987 {
988   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
997 {
998   PetscErrorCode ierr;
999   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1000 
1001   PetscFunctionBegin;
1002   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1003   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1004   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1009 
1010 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1011 {
1012   PetscErrorCode ierr;
1013   PetscInt       i,n;
1014   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1015   PetscReal      *work;
1016 
1017   PetscFunctionBegin;
1018   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1019   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1020   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1021   if (type == NORM_2) {
1022     for (i=0; i<n; i++) work[i] *= work[i];
1023   }
1024   if (type == NORM_INFINITY) {
1025     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1026   } else {
1027     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1028   }
1029   ierr = PetscFree(work);CHKERRQ(ierr);
1030   if (type == NORM_2) {
1031     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1032   }
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #if defined(PETSC_HAVE_CUDA)
1037 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1038 {
1039   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1040   PetscErrorCode ierr;
1041   PetscInt       lda;
1042 
1043   PetscFunctionBegin;
1044   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1045   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1046   if (!a->cvec) {
1047     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1048     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1049   }
1050   a->vecinuse = col + 1;
1051   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1052   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1053   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1054   *v   = a->cvec;
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1059 {
1060   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1061   PetscErrorCode ierr;
1062 
1063   PetscFunctionBegin;
1064   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1065   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1066   a->vecinuse = 0;
1067   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1068   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1069   *v   = NULL;
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1074 {
1075   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1076   PetscInt       lda;
1077   PetscErrorCode ierr;
1078 
1079   PetscFunctionBegin;
1080   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1081   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1082   if (!a->cvec) {
1083     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1084     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1085   }
1086   a->vecinuse = col + 1;
1087   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1088   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1089   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1090   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1091   *v   = a->cvec;
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1096 {
1097   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1102   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1103   a->vecinuse = 0;
1104   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1105   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1106   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1107   *v   = NULL;
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1112 {
1113   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1114   PetscErrorCode ierr;
1115   PetscInt       lda;
1116 
1117   PetscFunctionBegin;
1118   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1119   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1120   if (!a->cvec) {
1121     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1122     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1123   }
1124   a->vecinuse = col + 1;
1125   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1126   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1127   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1128   *v   = a->cvec;
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1133 {
1134   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1139   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1140   a->vecinuse = 0;
1141   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1142   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1143   *v   = NULL;
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1148 {
1149   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1154   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1155   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1160 {
1161   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1162   PetscErrorCode ierr;
1163 
1164   PetscFunctionBegin;
1165   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1166   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1167   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1172 {
1173   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1174   PetscErrorCode ierr;
1175 
1176   PetscFunctionBegin;
1177   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1178   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1179   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1184 {
1185   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1186   PetscErrorCode ierr;
1187 
1188   PetscFunctionBegin;
1189   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1194 {
1195   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1204 {
1205   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1214 {
1215   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1216   PetscErrorCode ierr;
1217 
1218   PetscFunctionBegin;
1219   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1224 {
1225   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1234 {
1235   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1244 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1245 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1246 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1247 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1248 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1249 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1250 
1251 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1252 {
1253   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1258   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1259   if (d->A) {
1260     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1261   }
1262   mat->boundtocpu = bind;
1263   if (!bind) {
1264     PetscBool iscuda;
1265 
1266     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1267     if (!iscuda) {
1268       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1269     }
1270     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1271     if (!iscuda) {
1272       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1273     }
1274     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1275     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1276     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1277     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1278     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1279     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1280   } else {
1281     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1282     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1283     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1284     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1285     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1286     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1287   }
1288   if (d->cmat) {
1289     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1290   }
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1295 {
1296   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1297   PetscErrorCode ierr;
1298   PetscBool      iscuda;
1299 
1300   PetscFunctionBegin;
1301   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1302   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1303   if (!iscuda) PetscFunctionReturn(0);
1304   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1305   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1306   if (!d->A) {
1307     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1308     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1309     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1310   }
1311   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1312   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1313   A->preallocated = PETSC_TRUE;
1314   PetscFunctionReturn(0);
1315 }
1316 #endif
1317 
1318 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1319 {
1320   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1329 {
1330   PetscFunctionBegin;
1331   *missing = PETSC_FALSE;
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1336 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1337 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1338 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1339 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1340 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1341 
1342 /* -------------------------------------------------------------------*/
1343 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1344                                         MatGetRow_MPIDense,
1345                                         MatRestoreRow_MPIDense,
1346                                         MatMult_MPIDense,
1347                                 /*  4*/ MatMultAdd_MPIDense,
1348                                         MatMultTranspose_MPIDense,
1349                                         MatMultTransposeAdd_MPIDense,
1350                                         NULL,
1351                                         NULL,
1352                                         NULL,
1353                                 /* 10*/ NULL,
1354                                         NULL,
1355                                         NULL,
1356                                         NULL,
1357                                         MatTranspose_MPIDense,
1358                                 /* 15*/ MatGetInfo_MPIDense,
1359                                         MatEqual_MPIDense,
1360                                         MatGetDiagonal_MPIDense,
1361                                         MatDiagonalScale_MPIDense,
1362                                         MatNorm_MPIDense,
1363                                 /* 20*/ MatAssemblyBegin_MPIDense,
1364                                         MatAssemblyEnd_MPIDense,
1365                                         MatSetOption_MPIDense,
1366                                         MatZeroEntries_MPIDense,
1367                                 /* 24*/ MatZeroRows_MPIDense,
1368                                         NULL,
1369                                         NULL,
1370                                         NULL,
1371                                         NULL,
1372                                 /* 29*/ MatSetUp_MPIDense,
1373                                         NULL,
1374                                         NULL,
1375                                         MatGetDiagonalBlock_MPIDense,
1376                                         NULL,
1377                                 /* 34*/ MatDuplicate_MPIDense,
1378                                         NULL,
1379                                         NULL,
1380                                         NULL,
1381                                         NULL,
1382                                 /* 39*/ MatAXPY_MPIDense,
1383                                         MatCreateSubMatrices_MPIDense,
1384                                         NULL,
1385                                         MatGetValues_MPIDense,
1386                                         NULL,
1387                                 /* 44*/ NULL,
1388                                         MatScale_MPIDense,
1389                                         MatShift_Basic,
1390                                         NULL,
1391                                         NULL,
1392                                 /* 49*/ MatSetRandom_MPIDense,
1393                                         NULL,
1394                                         NULL,
1395                                         NULL,
1396                                         NULL,
1397                                 /* 54*/ NULL,
1398                                         NULL,
1399                                         NULL,
1400                                         NULL,
1401                                         NULL,
1402                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1403                                         MatDestroy_MPIDense,
1404                                         MatView_MPIDense,
1405                                         NULL,
1406                                         NULL,
1407                                 /* 64*/ NULL,
1408                                         NULL,
1409                                         NULL,
1410                                         NULL,
1411                                         NULL,
1412                                 /* 69*/ NULL,
1413                                         NULL,
1414                                         NULL,
1415                                         NULL,
1416                                         NULL,
1417                                 /* 74*/ NULL,
1418                                         NULL,
1419                                         NULL,
1420                                         NULL,
1421                                         NULL,
1422                                 /* 79*/ NULL,
1423                                         NULL,
1424                                         NULL,
1425                                         NULL,
1426                                 /* 83*/ MatLoad_MPIDense,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                         NULL,
1431                                         NULL,
1432                                 /* 89*/ NULL,
1433                                         NULL,
1434                                         NULL,
1435                                         NULL,
1436                                         NULL,
1437                                 /* 94*/ NULL,
1438                                         NULL,
1439                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1440                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1441                                         NULL,
1442                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1443                                         NULL,
1444                                         NULL,
1445                                         MatConjugate_MPIDense,
1446                                         NULL,
1447                                 /*104*/ NULL,
1448                                         MatRealPart_MPIDense,
1449                                         MatImaginaryPart_MPIDense,
1450                                         NULL,
1451                                         NULL,
1452                                 /*109*/ NULL,
1453                                         NULL,
1454                                         NULL,
1455                                         MatGetColumnVector_MPIDense,
1456                                         MatMissingDiagonal_MPIDense,
1457                                 /*114*/ NULL,
1458                                         NULL,
1459                                         NULL,
1460                                         NULL,
1461                                         NULL,
1462                                 /*119*/ NULL,
1463                                         NULL,
1464                                         NULL,
1465                                         NULL,
1466                                         NULL,
1467                                 /*124*/ NULL,
1468                                         MatGetColumnNorms_MPIDense,
1469                                         NULL,
1470                                         NULL,
1471                                         NULL,
1472                                 /*129*/ NULL,
1473                                         NULL,
1474                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1475                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1476                                         NULL,
1477                                 /*134*/ NULL,
1478                                         NULL,
1479                                         NULL,
1480                                         NULL,
1481                                         NULL,
1482                                 /*139*/ NULL,
1483                                         NULL,
1484                                         NULL,
1485                                         NULL,
1486                                         NULL,
1487                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1488                                 /*145*/ NULL,
1489                                         NULL,
1490                                         NULL
1491 };
1492 
1493 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1494 {
1495   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1496   PetscBool      iscuda;
1497   PetscErrorCode ierr;
1498 
1499   PetscFunctionBegin;
1500   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1501   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1502   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1503   if (!a->A) {
1504     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1505     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1506     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1507   }
1508   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1509   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1510   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1511   mat->preallocated = PETSC_TRUE;
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #if defined(PETSC_HAVE_ELEMENTAL)
1516 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1517 {
1518   Mat            mat_elemental;
1519   PetscErrorCode ierr;
1520   PetscScalar    *v;
1521   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1522 
1523   PetscFunctionBegin;
1524   if (reuse == MAT_REUSE_MATRIX) {
1525     mat_elemental = *newmat;
1526     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1527   } else {
1528     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1529     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1530     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1531     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1532     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1533   }
1534 
1535   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1536   for (i=0; i<N; i++) cols[i] = i;
1537   for (i=0; i<m; i++) rows[i] = rstart + i;
1538 
1539   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1540   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1541   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1542   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1543   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1544   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1545   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1546 
1547   if (reuse == MAT_INPLACE_MATRIX) {
1548     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1549   } else {
1550     *newmat = mat_elemental;
1551   }
1552   PetscFunctionReturn(0);
1553 }
1554 #endif
1555 
1556 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1557 {
1558   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1559   PetscErrorCode ierr;
1560 
1561   PetscFunctionBegin;
1562   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1567 {
1568   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1569   PetscErrorCode ierr;
1570 
1571   PetscFunctionBegin;
1572   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1577 {
1578   PetscErrorCode ierr;
1579   Mat_MPIDense   *mat;
1580   PetscInt       m,nloc,N;
1581 
1582   PetscFunctionBegin;
1583   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1584   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1585   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1586     PetscInt sum;
1587 
1588     if (n == PETSC_DECIDE) {
1589       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1590     }
1591     /* Check sum(n) = N */
1592     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1593     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1594 
1595     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1596   }
1597 
1598   /* numeric phase */
1599   mat = (Mat_MPIDense*)(*outmat)->data;
1600   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1601   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1602   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 #if defined(PETSC_HAVE_CUDA)
1607 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1608 {
1609   Mat            B;
1610   Mat_MPIDense   *m;
1611   PetscErrorCode ierr;
1612 
1613   PetscFunctionBegin;
1614   if (reuse == MAT_INITIAL_MATRIX) {
1615     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1616   } else if (reuse == MAT_REUSE_MATRIX) {
1617     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1618   }
1619 
1620   B    = *newmat;
1621   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1622   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1623   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1624   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1626   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1627   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1629   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1630   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1632   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1633   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1635   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1636   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1637   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1638   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1639   m    = (Mat_MPIDense*)(B)->data;
1640   if (m->A) {
1641     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1642     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1643   }
1644   B->ops->bindtocpu = NULL;
1645   B->offloadmask    = PETSC_OFFLOAD_CPU;
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1650 {
1651   Mat            B;
1652   Mat_MPIDense   *m;
1653   PetscErrorCode ierr;
1654 
1655   PetscFunctionBegin;
1656   if (reuse == MAT_INITIAL_MATRIX) {
1657     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1658   } else if (reuse == MAT_REUSE_MATRIX) {
1659     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1660   }
1661 
1662   B    = *newmat;
1663   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1664   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1665   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1680   m    = (Mat_MPIDense*)(B)->data;
1681   if (m->A) {
1682     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1683     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1684     B->offloadmask = PETSC_OFFLOAD_BOTH;
1685   } else {
1686     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1687   }
1688   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1689 
1690   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1691   PetscFunctionReturn(0);
1692 }
1693 #endif
1694 
1695 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1696 {
1697   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1698   PetscErrorCode ierr;
1699   PetscInt       lda;
1700 
1701   PetscFunctionBegin;
1702   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1703   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1704   if (!a->cvec) {
1705     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1706   }
1707   a->vecinuse = col + 1;
1708   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1709   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1710   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1711   *v   = a->cvec;
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1716 {
1717   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1718   PetscErrorCode ierr;
1719 
1720   PetscFunctionBegin;
1721   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1722   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1723   a->vecinuse = 0;
1724   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1725   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1726   *v   = NULL;
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1731 {
1732   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1733   PetscErrorCode ierr;
1734   PetscInt       lda;
1735 
1736   PetscFunctionBegin;
1737   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1738   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1739   if (!a->cvec) {
1740     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1741   }
1742   a->vecinuse = col + 1;
1743   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1744   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1745   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1746   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1747   *v   = a->cvec;
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1752 {
1753   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1754   PetscErrorCode ierr;
1755 
1756   PetscFunctionBegin;
1757   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1758   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1759   a->vecinuse = 0;
1760   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1761   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1762   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1763   *v   = NULL;
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1768 {
1769   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1770   PetscErrorCode ierr;
1771   PetscInt       lda;
1772 
1773   PetscFunctionBegin;
1774   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1775   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1776   if (!a->cvec) {
1777     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1778   }
1779   a->vecinuse = col + 1;
1780   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1781   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1782   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1783   *v   = a->cvec;
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1788 {
1789   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1790   PetscErrorCode ierr;
1791 
1792   PetscFunctionBegin;
1793   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1794   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1795   a->vecinuse = 0;
1796   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1797   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1798   *v   = NULL;
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1803 {
1804   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1805   Mat_MPIDense   *c;
1806   PetscErrorCode ierr;
1807   MPI_Comm       comm;
1808   PetscBool      setup = PETSC_FALSE;
1809 
1810   PetscFunctionBegin;
1811   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1812   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1813   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1814   if (!a->cmat) {
1815     setup = PETSC_TRUE;
1816     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1817     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1818     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1819     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1820     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1821     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1822   } else if (cend-cbegin != a->cmat->cmap->N) {
1823     setup = PETSC_TRUE;
1824     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1825     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1826     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1827     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1828   }
1829   c = (Mat_MPIDense*)a->cmat->data;
1830   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1831   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1832   if (setup) { /* do we really need this? */
1833     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1834   }
1835   a->cmat->preallocated = PETSC_TRUE;
1836   a->cmat->assembled = PETSC_TRUE;
1837   a->matinuse = cbegin + 1;
1838   *v = a->cmat;
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1843 {
1844   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1845   Mat_MPIDense   *c;
1846   PetscErrorCode ierr;
1847 
1848   PetscFunctionBegin;
1849   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1850   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1851   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1852   a->matinuse = 0;
1853   c    = (Mat_MPIDense*)a->cmat->data;
1854   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1855   *v   = NULL;
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1860 {
1861   Mat_MPIDense   *a;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1866   mat->data = (void*)a;
1867   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1868 
1869   mat->insertmode = NOT_SET_VALUES;
1870 
1871   /* build cache for off array entries formed */
1872   a->donotstash = PETSC_FALSE;
1873 
1874   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1875 
1876   /* stuff used for matrix vector multiply */
1877   a->lvec        = NULL;
1878   a->Mvctx       = NULL;
1879   a->roworiented = PETSC_TRUE;
1880 
1881   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1882   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1883   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1884   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1885   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1886   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1887   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1888   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1889   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1890   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1891   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1892   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1893   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1894   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1895   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1896   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1897   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1898   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1899   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1900 #if defined(PETSC_HAVE_ELEMENTAL)
1901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1902 #endif
1903 #if defined(PETSC_HAVE_SCALAPACK)
1904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1905 #endif
1906 #if defined(PETSC_HAVE_CUDA)
1907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1908 #endif
1909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1912 #if defined(PETSC_HAVE_CUDA)
1913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1915 #endif
1916 
1917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1919   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 /*MC
1924    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1925 
1926    Options Database Keys:
1927 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1928 
1929   Level: beginner
1930 
1931 .seealso:
1932 
1933 M*/
1934 #if defined(PETSC_HAVE_CUDA)
1935 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1936 {
1937   PetscErrorCode ierr;
1938 
1939   PetscFunctionBegin;
1940   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
1941   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1942   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 #endif
1946 
1947 /*MC
1948    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1949 
1950    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1951    and MATMPIDENSE otherwise.
1952 
1953    Options Database Keys:
1954 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1955 
1956   Level: beginner
1957 
1958 
1959 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1960 M*/
1961 
1962 /*MC
1963    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1964 
1965    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1966    and MATMPIDENSECUDA otherwise.
1967 
1968    Options Database Keys:
1969 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1970 
1971   Level: beginner
1972 
1973 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1974 M*/
1975 
1976 /*@C
1977    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1978 
1979    Collective
1980 
1981    Input Parameters:
1982 .  B - the matrix
1983 -  data - optional location of matrix data.  Set data=NULL for PETSc
1984    to control all matrix memory allocation.
1985 
1986    Notes:
1987    The dense format is fully compatible with standard Fortran 77
1988    storage by columns.
1989 
1990    The data input variable is intended primarily for Fortran programmers
1991    who wish to allocate their own matrix memory space.  Most users should
1992    set data=NULL.
1993 
1994    Level: intermediate
1995 
1996 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1997 @*/
1998 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1999 {
2000   PetscErrorCode ierr;
2001 
2002   PetscFunctionBegin;
2003   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2004   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 /*@
2009    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2010    array provided by the user. This is useful to avoid copying an array
2011    into a matrix
2012 
2013    Not Collective
2014 
2015    Input Parameters:
2016 +  mat - the matrix
2017 -  array - the array in column major order
2018 
2019    Notes:
2020    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2021    freed when the matrix is destroyed.
2022 
2023    Level: developer
2024 
2025 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2026 
2027 @*/
2028 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2029 {
2030   PetscErrorCode ierr;
2031 
2032   PetscFunctionBegin;
2033   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2034   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2035   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2036 #if defined(PETSC_HAVE_CUDA)
2037   mat->offloadmask = PETSC_OFFLOAD_CPU;
2038 #endif
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 /*@
2043    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2044 
2045    Not Collective
2046 
2047    Input Parameters:
2048 .  mat - the matrix
2049 
2050    Notes:
2051    You can only call this after a call to MatDensePlaceArray()
2052 
2053    Level: developer
2054 
2055 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2056 
2057 @*/
2058 PetscErrorCode  MatDenseResetArray(Mat mat)
2059 {
2060   PetscErrorCode ierr;
2061 
2062   PetscFunctionBegin;
2063   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2064   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2065   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 /*@
2070    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2071    array provided by the user. This is useful to avoid copying an array
2072    into a matrix
2073 
2074    Not Collective
2075 
2076    Input Parameters:
2077 +  mat - the matrix
2078 -  array - the array in column major order
2079 
2080    Notes:
2081    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2082    freed by the user. It will be freed when the matrix is destroyed.
2083 
2084    Level: developer
2085 
2086 .seealso: MatDenseGetArray(), VecReplaceArray()
2087 @*/
2088 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2089 {
2090   PetscErrorCode ierr;
2091 
2092   PetscFunctionBegin;
2093   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2094   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2095   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2096 #if defined(PETSC_HAVE_CUDA)
2097   mat->offloadmask = PETSC_OFFLOAD_CPU;
2098 #endif
2099   PetscFunctionReturn(0);
2100 }
2101 
2102 #if defined(PETSC_HAVE_CUDA)
2103 /*@C
2104    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2105    array provided by the user. This is useful to avoid copying an array
2106    into a matrix
2107 
2108    Not Collective
2109 
2110    Input Parameters:
2111 +  mat - the matrix
2112 -  array - the array in column major order
2113 
2114    Notes:
2115    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2116    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2117 
2118    Level: developer
2119 
2120 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2121 @*/
2122 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2123 {
2124   PetscErrorCode ierr;
2125 
2126   PetscFunctionBegin;
2127   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2128   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2129   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2130   mat->offloadmask = PETSC_OFFLOAD_GPU;
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 /*@C
2135    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2136 
2137    Not Collective
2138 
2139    Input Parameters:
2140 .  mat - the matrix
2141 
2142    Notes:
2143    You can only call this after a call to MatDenseCUDAPlaceArray()
2144 
2145    Level: developer
2146 
2147 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2148 
2149 @*/
2150 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2151 {
2152   PetscErrorCode ierr;
2153 
2154   PetscFunctionBegin;
2155   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2156   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2157   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 /*@C
2162    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2163    array provided by the user. This is useful to avoid copying an array
2164    into a matrix
2165 
2166    Not Collective
2167 
2168    Input Parameters:
2169 +  mat - the matrix
2170 -  array - the array in column major order
2171 
2172    Notes:
2173    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2174    The memory passed in CANNOT be freed by the user. It will be freed
2175    when the matrix is destroyed. The array should respect the matrix leading dimension.
2176 
2177    Level: developer
2178 
2179 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2180 @*/
2181 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2182 {
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2187   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2188   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2189   mat->offloadmask = PETSC_OFFLOAD_GPU;
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 /*@C
2194    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2195 
2196    Not Collective
2197 
2198    Input Parameters:
2199 .  A - the matrix
2200 
2201    Output Parameters
2202 .  array - the GPU array in column major order
2203 
2204    Notes:
2205    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2206 
2207    Level: developer
2208 
2209 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2210 @*/
2211 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2212 {
2213   PetscErrorCode ierr;
2214 
2215   PetscFunctionBegin;
2216   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2217   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2218   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 /*@C
2223    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2224 
2225    Not Collective
2226 
2227    Input Parameters:
2228 +  A - the matrix
2229 -  array - the GPU array in column major order
2230 
2231    Notes:
2232 
2233    Level: developer
2234 
2235 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2236 @*/
2237 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2238 {
2239   PetscErrorCode ierr;
2240 
2241   PetscFunctionBegin;
2242   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2243   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2244   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2245   A->offloadmask = PETSC_OFFLOAD_GPU;
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 /*@C
2250    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2251 
2252    Not Collective
2253 
2254    Input Parameters:
2255 .  A - the matrix
2256 
2257    Output Parameters
2258 .  array - the GPU array in column major order
2259 
2260    Notes:
2261    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2262 
2263    Level: developer
2264 
2265 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2266 @*/
2267 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2268 {
2269   PetscErrorCode ierr;
2270 
2271   PetscFunctionBegin;
2272   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2273   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 /*@C
2278    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2279 
2280    Not Collective
2281 
2282    Input Parameters:
2283 +  A - the matrix
2284 -  array - the GPU array in column major order
2285 
2286    Notes:
2287    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2288 
2289    Level: developer
2290 
2291 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2292 @*/
2293 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2294 {
2295   PetscErrorCode ierr;
2296 
2297   PetscFunctionBegin;
2298   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 /*@C
2303    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2304 
2305    Not Collective
2306 
2307    Input Parameters:
2308 .  A - the matrix
2309 
2310    Output Parameters
2311 .  array - the GPU array in column major order
2312 
2313    Notes:
2314    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2315 
2316    Level: developer
2317 
2318 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2319 @*/
2320 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2321 {
2322   PetscErrorCode ierr;
2323 
2324   PetscFunctionBegin;
2325   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2326   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2327   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 /*@C
2332    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2333 
2334    Not Collective
2335 
2336    Input Parameters:
2337 +  A - the matrix
2338 -  array - the GPU array in column major order
2339 
2340    Notes:
2341 
2342    Level: developer
2343 
2344 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2345 @*/
2346 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2347 {
2348   PetscErrorCode ierr;
2349 
2350   PetscFunctionBegin;
2351   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2352   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2353   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2354   A->offloadmask = PETSC_OFFLOAD_GPU;
2355   PetscFunctionReturn(0);
2356 }
2357 #endif
2358 
2359 /*@C
2360    MatCreateDense - Creates a matrix in dense format.
2361 
2362    Collective
2363 
2364    Input Parameters:
2365 +  comm - MPI communicator
2366 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2367 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2368 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2369 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2370 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2371    to control all matrix memory allocation.
2372 
2373    Output Parameter:
2374 .  A - the matrix
2375 
2376    Notes:
2377    The dense format is fully compatible with standard Fortran 77
2378    storage by columns.
2379 
2380    The data input variable is intended primarily for Fortran programmers
2381    who wish to allocate their own matrix memory space.  Most users should
2382    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2383 
2384    The user MUST specify either the local or global matrix dimensions
2385    (possibly both).
2386 
2387    Level: intermediate
2388 
2389 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2390 @*/
2391 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2392 {
2393   PetscErrorCode ierr;
2394   PetscMPIInt    size;
2395 
2396   PetscFunctionBegin;
2397   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2398   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2399   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2400   if (size > 1) {
2401     PetscBool havedata = (PetscBool)!!data;
2402 
2403     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2404     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2405     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr);
2406     if (havedata) {  /* user provided data array, so no need to assemble */
2407       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2408       (*A)->assembled = PETSC_TRUE;
2409     }
2410   } else {
2411     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2412     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2413   }
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 #if defined(PETSC_HAVE_CUDA)
2418 /*@C
2419    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2420 
2421    Collective
2422 
2423    Input Parameters:
2424 +  comm - MPI communicator
2425 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2426 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2427 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2428 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2429 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2430    to control matrix memory allocation.
2431 
2432    Output Parameter:
2433 .  A - the matrix
2434 
2435    Notes:
2436 
2437    Level: intermediate
2438 
2439 .seealso: MatCreate(), MatCreateDense()
2440 @*/
2441 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2442 {
2443   PetscErrorCode ierr;
2444   PetscMPIInt    size;
2445 
2446   PetscFunctionBegin;
2447   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2448   PetscValidLogicalCollectiveBool(*A,!!data,6);
2449   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2450   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2451   if (size > 1) {
2452     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2453     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2454     if (data) {  /* user provided data array, so no need to assemble */
2455       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2456       (*A)->assembled = PETSC_TRUE;
2457     }
2458   } else {
2459     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2460     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2461   }
2462   PetscFunctionReturn(0);
2463 }
2464 #endif
2465 
2466 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2467 {
2468   Mat            mat;
2469   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2470   PetscErrorCode ierr;
2471 
2472   PetscFunctionBegin;
2473   *newmat = NULL;
2474   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2475   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2476   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2477   a       = (Mat_MPIDense*)mat->data;
2478 
2479   mat->factortype   = A->factortype;
2480   mat->assembled    = PETSC_TRUE;
2481   mat->preallocated = PETSC_TRUE;
2482 
2483   mat->insertmode = NOT_SET_VALUES;
2484   a->donotstash   = oldmat->donotstash;
2485 
2486   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2487   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2488 
2489   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2490   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2491   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2492 
2493   *newmat = mat;
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2498 {
2499   PetscErrorCode ierr;
2500   PetscBool      isbinary;
2501 #if defined(PETSC_HAVE_HDF5)
2502   PetscBool      ishdf5;
2503 #endif
2504 
2505   PetscFunctionBegin;
2506   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2507   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2508   /* force binary viewer to load .info file if it has not yet done so */
2509   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2510   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2511 #if defined(PETSC_HAVE_HDF5)
2512   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2513 #endif
2514   if (isbinary) {
2515     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2516 #if defined(PETSC_HAVE_HDF5)
2517   } else if (ishdf5) {
2518     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2519 #endif
2520   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2525 {
2526   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2527   Mat            a,b;
2528   PetscBool      flg;
2529   PetscErrorCode ierr;
2530 
2531   PetscFunctionBegin;
2532   a    = matA->A;
2533   b    = matB->A;
2534   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2535   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2540 {
2541   PetscErrorCode        ierr;
2542   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2543 
2544   PetscFunctionBegin;
2545   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2546   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2547   ierr = PetscFree(atb);CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2552 {
2553   PetscErrorCode        ierr;
2554   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2555 
2556   PetscFunctionBegin;
2557   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2558   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2559   ierr = PetscFree(abt);CHKERRQ(ierr);
2560   PetscFunctionReturn(0);
2561 }
2562 
2563 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2564 {
2565   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2566   Mat_TransMatMultDense *atb;
2567   PetscErrorCode        ierr;
2568   MPI_Comm              comm;
2569   PetscMPIInt           size,*recvcounts;
2570   PetscScalar           *carray,*sendbuf;
2571   const PetscScalar     *atbarray;
2572   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2573   const PetscInt        *ranges;
2574 
2575   PetscFunctionBegin;
2576   MatCheckProduct(C,3);
2577   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2578   atb = (Mat_TransMatMultDense *)C->product->data;
2579   recvcounts = atb->recvcounts;
2580   sendbuf = atb->sendbuf;
2581 
2582   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2583   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2584 
2585   /* compute atbarray = aseq^T * bseq */
2586   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2587 
2588   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2589 
2590   /* arrange atbarray into sendbuf */
2591   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2592   for (proc=0, k=0; proc<size; proc++) {
2593     for (j=0; j<cN; j++) {
2594       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2595     }
2596   }
2597   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2598 
2599   /* sum all atbarray to local values of C */
2600   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2601   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2602   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2603   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2604   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2609 {
2610   PetscErrorCode        ierr;
2611   MPI_Comm              comm;
2612   PetscMPIInt           size;
2613   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2614   Mat_TransMatMultDense *atb;
2615   PetscBool             cisdense;
2616   PetscInt              i;
2617   const PetscInt        *ranges;
2618 
2619   PetscFunctionBegin;
2620   MatCheckProduct(C,3);
2621   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2622   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2623   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2624     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2625   }
2626 
2627   /* create matrix product C */
2628   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2629   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2630   if (!cisdense) {
2631     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2632   }
2633   ierr = MatSetUp(C);CHKERRQ(ierr);
2634 
2635   /* create data structure for reuse C */
2636   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2637   ierr = PetscNew(&atb);CHKERRQ(ierr);
2638   cM   = C->rmap->N;
2639   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2640   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2641   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2642 
2643   C->product->data    = atb;
2644   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2649 {
2650   PetscErrorCode        ierr;
2651   MPI_Comm              comm;
2652   PetscMPIInt           i, size;
2653   PetscInt              maxRows, bufsiz;
2654   PetscMPIInt           tag;
2655   PetscInt              alg;
2656   Mat_MatTransMultDense *abt;
2657   Mat_Product           *product = C->product;
2658   PetscBool             flg;
2659 
2660   PetscFunctionBegin;
2661   MatCheckProduct(C,4);
2662   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2663   /* check local size of A and B */
2664   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2665 
2666   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2667   alg  = flg ? 0 : 1;
2668 
2669   /* setup matrix product C */
2670   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2671   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2672   ierr = MatSetUp(C);CHKERRQ(ierr);
2673   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2674 
2675   /* create data structure for reuse C */
2676   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2677   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2678   ierr = PetscNew(&abt);CHKERRQ(ierr);
2679   abt->tag = tag;
2680   abt->alg = alg;
2681   switch (alg) {
2682   case 1: /* alg: "cyclic" */
2683     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2684     bufsiz = A->cmap->N * maxRows;
2685     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2686     break;
2687   default: /* alg: "allgatherv" */
2688     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2689     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2690     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2691     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2692     break;
2693   }
2694 
2695   C->product->data    = abt;
2696   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2697   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2702 {
2703   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2704   Mat_MatTransMultDense *abt;
2705   PetscErrorCode        ierr;
2706   MPI_Comm              comm;
2707   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2708   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2709   PetscInt              i,cK=A->cmap->N,k,j,bn;
2710   PetscScalar           _DOne=1.0,_DZero=0.0;
2711   const PetscScalar     *av,*bv;
2712   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2713   MPI_Request           reqs[2];
2714   const PetscInt        *ranges;
2715 
2716   PetscFunctionBegin;
2717   MatCheckProduct(C,3);
2718   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2719   abt  = (Mat_MatTransMultDense*)C->product->data;
2720   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2721   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2722   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2723   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2724   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2725   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2726   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2727   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2728   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2729   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2730   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2731   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2732   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2733   bn   = B->rmap->n;
2734   if (blda == bn) {
2735     sendbuf = (PetscScalar*)bv;
2736   } else {
2737     sendbuf = abt->buf[0];
2738     for (k = 0, i = 0; i < cK; i++) {
2739       for (j = 0; j < bn; j++, k++) {
2740         sendbuf[k] = bv[i * blda + j];
2741       }
2742     }
2743   }
2744   if (size > 1) {
2745     sendto = (rank + size - 1) % size;
2746     recvfrom = (rank + size + 1) % size;
2747   } else {
2748     sendto = recvfrom = 0;
2749   }
2750   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2751   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2752   recvisfrom = rank;
2753   for (i = 0; i < size; i++) {
2754     /* we have finished receiving in sending, bufs can be read/modified */
2755     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2756     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2757 
2758     if (nextrecvisfrom != rank) {
2759       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2760       sendsiz = cK * bn;
2761       recvsiz = cK * nextbn;
2762       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2763       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2764       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2765     }
2766 
2767     /* local aseq * sendbuf^T */
2768     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2769     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2770 
2771     if (nextrecvisfrom != rank) {
2772       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2773       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2774     }
2775     bn = nextbn;
2776     recvisfrom = nextrecvisfrom;
2777     sendbuf = recvbuf;
2778   }
2779   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2780   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2781   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2782   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2783   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2784   PetscFunctionReturn(0);
2785 }
2786 
2787 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2788 {
2789   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2790   Mat_MatTransMultDense *abt;
2791   PetscErrorCode        ierr;
2792   MPI_Comm              comm;
2793   PetscMPIInt           size;
2794   PetscScalar           *cv, *sendbuf, *recvbuf;
2795   const PetscScalar     *av,*bv;
2796   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2797   PetscScalar           _DOne=1.0,_DZero=0.0;
2798   PetscBLASInt          cm, cn, ck, alda, clda;
2799 
2800   PetscFunctionBegin;
2801   MatCheckProduct(C,3);
2802   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2803   abt  = (Mat_MatTransMultDense*)C->product->data;
2804   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2805   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2806   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2807   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2808   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2809   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2810   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2811   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2812   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2813   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2814   /* copy transpose of B into buf[0] */
2815   bn      = B->rmap->n;
2816   sendbuf = abt->buf[0];
2817   recvbuf = abt->buf[1];
2818   for (k = 0, j = 0; j < bn; j++) {
2819     for (i = 0; i < cK; i++, k++) {
2820       sendbuf[k] = bv[i * blda + j];
2821     }
2822   }
2823   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2824   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2825   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2826   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2827   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2828   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2829   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2830   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2831   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2832   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2833   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2838 {
2839   Mat_MatTransMultDense *abt;
2840   PetscErrorCode        ierr;
2841 
2842   PetscFunctionBegin;
2843   MatCheckProduct(C,3);
2844   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2845   abt = (Mat_MatTransMultDense*)C->product->data;
2846   switch (abt->alg) {
2847   case 1:
2848     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2849     break;
2850   default:
2851     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2852     break;
2853   }
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2858 {
2859   PetscErrorCode   ierr;
2860   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2861 
2862   PetscFunctionBegin;
2863   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2864   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2865   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2866   ierr = PetscFree(ab);CHKERRQ(ierr);
2867   PetscFunctionReturn(0);
2868 }
2869 
2870 #if defined(PETSC_HAVE_ELEMENTAL)
2871 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2872 {
2873   PetscErrorCode   ierr;
2874   Mat_MatMultDense *ab;
2875 
2876   PetscFunctionBegin;
2877   MatCheckProduct(C,3);
2878   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2879   ab   = (Mat_MatMultDense*)C->product->data;
2880   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2881   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2882   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2883   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2888 {
2889   PetscErrorCode   ierr;
2890   Mat              Ae,Be,Ce;
2891   Mat_MatMultDense *ab;
2892 
2893   PetscFunctionBegin;
2894   MatCheckProduct(C,4);
2895   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2896   /* check local size of A and B */
2897   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2898     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2899   }
2900 
2901   /* create elemental matrices Ae and Be */
2902   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2903   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2904   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2905   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2906   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2907 
2908   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2909   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2910   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2911   ierr = MatSetUp(Be);CHKERRQ(ierr);
2912   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2913 
2914   /* compute symbolic Ce = Ae*Be */
2915   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2916   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2917 
2918   /* setup C */
2919   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2920   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2921   ierr = MatSetUp(C);CHKERRQ(ierr);
2922 
2923   /* create data structure for reuse Cdense */
2924   ierr = PetscNew(&ab);CHKERRQ(ierr);
2925   ab->Ae = Ae;
2926   ab->Be = Be;
2927   ab->Ce = Ce;
2928 
2929   C->product->data    = ab;
2930   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2931   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2932   PetscFunctionReturn(0);
2933 }
2934 #endif
2935 /* ----------------------------------------------- */
2936 #if defined(PETSC_HAVE_ELEMENTAL)
2937 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2938 {
2939   PetscFunctionBegin;
2940   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2941   C->ops->productsymbolic = MatProductSymbolic_AB;
2942   PetscFunctionReturn(0);
2943 }
2944 #endif
2945 
2946 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2947 {
2948   Mat_Product *product = C->product;
2949   Mat         A = product->A,B=product->B;
2950 
2951   PetscFunctionBegin;
2952   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2953     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2954   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2955   C->ops->productsymbolic = MatProductSymbolic_AtB;
2956   PetscFunctionReturn(0);
2957 }
2958 
2959 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2960 {
2961   PetscErrorCode ierr;
2962   Mat_Product    *product = C->product;
2963   const char     *algTypes[2] = {"allgatherv","cyclic"};
2964   PetscInt       alg,nalg = 2;
2965   PetscBool      flg = PETSC_FALSE;
2966 
2967   PetscFunctionBegin;
2968   /* Set default algorithm */
2969   alg = 0; /* default is allgatherv */
2970   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2971   if (flg) {
2972     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2973   }
2974 
2975   /* Get runtime option */
2976   if (product->api_user) {
2977     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2978     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2979     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2980   } else {
2981     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2982     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2983     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2984   }
2985   if (flg) {
2986     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2987   }
2988 
2989   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2990   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2991   PetscFunctionReturn(0);
2992 }
2993 
2994 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2995 {
2996   PetscErrorCode ierr;
2997   Mat_Product    *product = C->product;
2998 
2999   PetscFunctionBegin;
3000   switch (product->type) {
3001 #if defined(PETSC_HAVE_ELEMENTAL)
3002   case MATPRODUCT_AB:
3003     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3004     break;
3005 #endif
3006   case MATPRODUCT_AtB:
3007     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3008     break;
3009   case MATPRODUCT_ABt:
3010     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3011     break;
3012   default:
3013     break;
3014   }
3015   PetscFunctionReturn(0);
3016 }
3017