xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 002bc6da1411a4d48d0ee198a7e40f4808f8958e)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
44 {
45   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46   PetscErrorCode ierr;
47   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
48 
49   PetscFunctionBegin;
50   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
51   lrow = row - rstart;
52   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57 {
58   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
59   PetscErrorCode ierr;
60   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
61 
62   PetscFunctionBegin;
63   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
64   lrow = row - rstart;
65   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70 {
71   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
72   PetscErrorCode ierr;
73   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
74   PetscScalar    *array;
75   MPI_Comm       comm;
76   PetscBool      flg;
77   Mat            B;
78 
79   PetscFunctionBegin;
80   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
81   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
82   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
83   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
84 
85     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
86     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
87     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
88     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
89     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
90     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
91     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
92     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
93     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
94     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
95     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
96     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
97     *a   = B;
98     ierr = MatDestroy(&B);CHKERRQ(ierr);
99   } else *a = B;
100   PetscFunctionReturn(0);
101 }
102 
103 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
104 {
105   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
106   PetscErrorCode ierr;
107   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
108   PetscBool      roworiented = A->roworiented;
109 
110   PetscFunctionBegin;
111   for (i=0; i<m; i++) {
112     if (idxm[i] < 0) continue;
113     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
114     if (idxm[i] >= rstart && idxm[i] < rend) {
115       row = idxm[i] - rstart;
116       if (roworiented) {
117         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
118       } else {
119         for (j=0; j<n; j++) {
120           if (idxn[j] < 0) continue;
121           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
122           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
123         }
124       }
125     } else if (!A->donotstash) {
126       mat->assembled = PETSC_FALSE;
127       if (roworiented) {
128         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
129       } else {
130         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
131       }
132     }
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
160 {
161   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
162   PetscErrorCode ierr;
163 
164   PetscFunctionBegin;
165   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
170 {
171   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
172   PetscBool      iscuda;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   if (!a->A) {
177     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
178     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
179     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
180     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
181     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
182     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
183     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
184     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
185   }
186   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
191 {
192   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
193   PetscErrorCode ierr;
194 
195   PetscFunctionBegin;
196   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
197   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
230   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
231   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
232   PetscFunctionReturn(0);
233 }
234 
235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
236 {
237   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
238   PetscErrorCode ierr;
239 
240   PetscFunctionBegin;
241   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
242   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
243   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
248 {
249   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
250   PetscErrorCode ierr;
251 
252   PetscFunctionBegin;
253   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
254   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
255   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
260 {
261   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
262   PetscErrorCode    ierr;
263   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
264   const PetscInt    *irow,*icol;
265   const PetscScalar *v;
266   PetscScalar       *bv;
267   Mat               newmat;
268   IS                iscol_local;
269   MPI_Comm          comm_is,comm_mat;
270 
271   PetscFunctionBegin;
272   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
273   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
274   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
275 
276   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
277   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
278   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
279   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
280   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
281   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
282 
283   /* No parallel redistribution currently supported! Should really check each index set
284      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
285      original matrix! */
286 
287   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
288   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
289 
290   /* Check submatrix call */
291   if (scall == MAT_REUSE_MATRIX) {
292     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
293     /* Really need to test rows and column sizes! */
294     newmat = *B;
295   } else {
296     /* Create and fill new matrix */
297     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
298     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
299     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
300     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
301   }
302 
303   /* Now extract the data pointers and do the copy, column at a time */
304   newmatd = (Mat_MPIDense*)newmat->data;
305   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
306   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
307   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
308   for (i=0; i<Ncols; i++) {
309     const PetscScalar *av = v + lda*icol[i];
310     for (j=0; j<nrows; j++) {
311       *bv++ = av[irow[j] - rstart];
312     }
313   }
314   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
315   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
316 
317   /* Assemble the matrices so that the correct flags are set */
318   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320 
321   /* Free work space */
322   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
323   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
324   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
325   *B   = newmat;
326   PetscFunctionReturn(0);
327 }
328 
329 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
330 {
331   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
332   PetscErrorCode ierr;
333 
334   PetscFunctionBegin;
335   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
336   PetscFunctionReturn(0);
337 }
338 
339 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
340 {
341   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
342   PetscErrorCode ierr;
343 
344   PetscFunctionBegin;
345   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352   PetscErrorCode ierr;
353 
354   PetscFunctionBegin;
355   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
356   PetscFunctionReturn(0);
357 }
358 
359 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
360 {
361   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       nstash,reallocs;
364 
365   PetscFunctionBegin;
366   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
367 
368   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
369   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
370   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
371   PetscFunctionReturn(0);
372 }
373 
374 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
375 {
376   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
377   PetscErrorCode ierr;
378   PetscInt       i,*row,*col,flg,j,rstart,ncols;
379   PetscMPIInt    n;
380   PetscScalar    *val;
381 
382   PetscFunctionBegin;
383   if (!mdn->donotstash && !mat->nooffprocentries) {
384     /*  wait on receives */
385     while (1) {
386       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
387       if (!flg) break;
388 
389       for (i=0; i<n;) {
390         /* Now identify the consecutive vals belonging to the same row */
391         for (j=i,rstart=row[j]; j<n; j++) {
392           if (row[j] != rstart) break;
393         }
394         if (j < n) ncols = j-i;
395         else       ncols = n-i;
396         /* Now assemble all these values with a single function call */
397         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
398         i    = j;
399       }
400     }
401     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
402   }
403 
404   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
405   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
406 
407   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
408     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
409   }
410   PetscFunctionReturn(0);
411 }
412 
413 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
414 {
415   PetscErrorCode ierr;
416   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
417 
418   PetscFunctionBegin;
419   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
423 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
424 {
425   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
426   PetscErrorCode    ierr;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
438     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
441     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
442   }
443   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
444   if (diag != 0.0) {
445     Vec d;
446 
447     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
448     ierr = VecSet(d,diag);CHKERRQ(ierr);
449     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
450     ierr = VecDestroy(&d);CHKERRQ(ierr);
451   }
452   ierr = PetscFree(lrows);CHKERRQ(ierr);
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   PetscErrorCode    ierr;
465   const PetscScalar *ax;
466   PetscScalar       *ay;
467   PetscMemType      axmtype,aymtype;
468 
469   PetscFunctionBegin;
470   ierr = VecGetArrayReadInPlace_Internal(xx,&ax,&axmtype);CHKERRQ(ierr);
471   ierr = VecGetArrayInPlace_Internal(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
472   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr);
473   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
474   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
475   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
476   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
483   PetscErrorCode    ierr;
484   const PetscScalar *ax;
485   PetscScalar       *ay;
486   PetscMemType      axmtype,aymtype;
487 
488   PetscFunctionBegin;
489   ierr = VecGetArrayReadInPlace_Internal(xx,&ax,&axmtype);CHKERRQ(ierr);
490   ierr = VecGetArrayInPlace_Internal(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
491   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr);
492   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
493   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
494   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
495   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
502   PetscErrorCode    ierr;
503   const PetscScalar *ax;
504   PetscScalar       *ay;
505   PetscMemType      axmtype,aymtype;
506 
507   PetscFunctionBegin;
508   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
509   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
510   ierr = VecGetArrayReadInPlace_Internal(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
511   ierr = VecGetArrayInPlace_Internal(yy,&ay,&aymtype);CHKERRQ(ierr);
512   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
513   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
514   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
515   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
520 {
521   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
522   PetscErrorCode    ierr;
523   const PetscScalar *ax;
524   PetscScalar       *ay;
525   PetscMemType      axmtype,aymtype;
526 
527   PetscFunctionBegin;
528   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
529   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
530   ierr = VecGetArrayReadInPlace_Internal(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
531   ierr = VecGetArrayInPlace_Internal(zz,&ay,&aymtype);CHKERRQ(ierr);
532   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
533   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
534   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
535   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
542   PetscErrorCode    ierr;
543   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
544   PetscScalar       *x,zero = 0.0;
545   const PetscScalar *av;
546 
547   PetscFunctionBegin;
548   ierr = VecSet(v,zero);CHKERRQ(ierr);
549   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
550   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
551   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
552   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
553   radd = A->rmap->rstart*m;
554   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
555   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
556   for (i=0; i<len; i++) {
557     x[i] = av[radd + i*lda + i];
558   }
559   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
560   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
561   PetscFunctionReturn(0);
562 }
563 
564 PetscErrorCode MatDestroy_MPIDense(Mat mat)
565 {
566   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
567   PetscErrorCode ierr;
568 
569   PetscFunctionBegin;
570 #if defined(PETSC_USE_LOG)
571   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
572 #endif
573   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
574   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
575   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
576   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
577   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
578   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
579   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
580   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
581 
582   ierr = PetscFree(mat->data);CHKERRQ(ierr);
583   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
584 
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
596 #if defined(PETSC_HAVE_ELEMENTAL)
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
598 #endif
599 #if defined(PETSC_HAVE_SCALAPACK)
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
601 #endif
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
605 #if defined (PETSC_HAVE_CUDA)
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
608 #endif
609   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
610   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
611   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
612   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
614   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
615   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
616   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
617   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
618   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
619   PetscFunctionReturn(0);
620 }
621 
622 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
623 
624 #include <petscdraw.h>
625 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
626 {
627   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
628   PetscErrorCode    ierr;
629   PetscMPIInt       rank;
630   PetscViewerType   vtype;
631   PetscBool         iascii,isdraw;
632   PetscViewer       sviewer;
633   PetscViewerFormat format;
634 
635   PetscFunctionBegin;
636   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
637   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
638   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
639   if (iascii) {
640     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
641     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
642     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
643       MatInfo info;
644       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
645       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
646       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
647       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
648       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
649       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
650       PetscFunctionReturn(0);
651     } else if (format == PETSC_VIEWER_ASCII_INFO) {
652       PetscFunctionReturn(0);
653     }
654   } else if (isdraw) {
655     PetscDraw draw;
656     PetscBool isnull;
657 
658     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
659     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
660     if (isnull) PetscFunctionReturn(0);
661   }
662 
663   {
664     /* assemble the entire matrix onto first processor. */
665     Mat         A;
666     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
667     PetscInt    *cols;
668     PetscScalar *vals;
669 
670     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
671     if (!rank) {
672       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
673     } else {
674       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
675     }
676     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
677     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
678     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
679     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
680 
681     /* Copy the matrix ... This isn't the most efficient means,
682        but it's quick for now */
683     A->insertmode = INSERT_VALUES;
684 
685     row = mat->rmap->rstart;
686     m   = mdn->A->rmap->n;
687     for (i=0; i<m; i++) {
688       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
689       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
690       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
691       row++;
692     }
693 
694     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
695     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
696     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
697     if (!rank) {
698       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
699       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
700     }
701     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
702     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
703     ierr = MatDestroy(&A);CHKERRQ(ierr);
704   }
705   PetscFunctionReturn(0);
706 }
707 
708 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
709 {
710   PetscErrorCode ierr;
711   PetscBool      iascii,isbinary,isdraw,issocket;
712 
713   PetscFunctionBegin;
714   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
715   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
716   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
717   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
718 
719   if (iascii || issocket || isdraw) {
720     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
721   } else if (isbinary) {
722     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
728 {
729   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
730   Mat            mdn  = mat->A;
731   PetscErrorCode ierr;
732   PetscLogDouble isend[5],irecv[5];
733 
734   PetscFunctionBegin;
735   info->block_size = 1.0;
736 
737   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
738 
739   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
740   isend[3] = info->memory;  isend[4] = info->mallocs;
741   if (flag == MAT_LOCAL) {
742     info->nz_used      = isend[0];
743     info->nz_allocated = isend[1];
744     info->nz_unneeded  = isend[2];
745     info->memory       = isend[3];
746     info->mallocs      = isend[4];
747   } else if (flag == MAT_GLOBAL_MAX) {
748     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
749 
750     info->nz_used      = irecv[0];
751     info->nz_allocated = irecv[1];
752     info->nz_unneeded  = irecv[2];
753     info->memory       = irecv[3];
754     info->mallocs      = irecv[4];
755   } else if (flag == MAT_GLOBAL_SUM) {
756     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
757 
758     info->nz_used      = irecv[0];
759     info->nz_allocated = irecv[1];
760     info->nz_unneeded  = irecv[2];
761     info->memory       = irecv[3];
762     info->mallocs      = irecv[4];
763   }
764   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
765   info->fill_ratio_needed = 0;
766   info->factor_mallocs    = 0;
767   PetscFunctionReturn(0);
768 }
769 
770 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
771 {
772   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   switch (op) {
777   case MAT_NEW_NONZERO_LOCATIONS:
778   case MAT_NEW_NONZERO_LOCATION_ERR:
779   case MAT_NEW_NONZERO_ALLOCATION_ERR:
780     MatCheckPreallocated(A,1);
781     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
782     break;
783   case MAT_ROW_ORIENTED:
784     MatCheckPreallocated(A,1);
785     a->roworiented = flg;
786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
787     break;
788   case MAT_NEW_DIAGONALS:
789   case MAT_KEEP_NONZERO_PATTERN:
790   case MAT_USE_HASH_TABLE:
791   case MAT_SORTED_FULL:
792     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
793     break;
794   case MAT_IGNORE_OFF_PROC_ENTRIES:
795     a->donotstash = flg;
796     break;
797   case MAT_SYMMETRIC:
798   case MAT_STRUCTURALLY_SYMMETRIC:
799   case MAT_HERMITIAN:
800   case MAT_SYMMETRY_ETERNAL:
801   case MAT_IGNORE_LOWER_TRIANGULAR:
802   case MAT_IGNORE_ZERO_ENTRIES:
803     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
804     break;
805   default:
806     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
812 {
813   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
814   const PetscScalar *l;
815   PetscScalar       x,*v,*vv,*r;
816   PetscErrorCode    ierr;
817   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
818 
819   PetscFunctionBegin;
820   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
821   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
822   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
823   if (ll) {
824     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
825     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
826     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
827     for (i=0; i<m; i++) {
828       x = l[i];
829       v = vv + i;
830       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
831     }
832     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
833     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
834   }
835   if (rr) {
836     const PetscScalar *ar;
837 
838     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
839     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
840     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
841     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
842     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
843     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
844     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
845     for (i=0; i<n; i++) {
846       x = r[i];
847       v = vv + i*lda;
848       for (j=0; j<m; j++) (*v++) *= x;
849     }
850     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
851     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
852   }
853   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
854   PetscFunctionReturn(0);
855 }
856 
857 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
858 {
859   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
860   PetscErrorCode    ierr;
861   PetscInt          i,j;
862   PetscMPIInt       size;
863   PetscReal         sum = 0.0;
864   const PetscScalar *av,*v;
865 
866   PetscFunctionBegin;
867   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
868   v    = av;
869   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
870   if (size == 1) {
871     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
872   } else {
873     if (type == NORM_FROBENIUS) {
874       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
875         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
876       }
877       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
878       *nrm = PetscSqrtReal(*nrm);
879       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
880     } else if (type == NORM_1) {
881       PetscReal *tmp,*tmp2;
882       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
883       *nrm = 0.0;
884       v    = av;
885       for (j=0; j<mdn->A->cmap->n; j++) {
886         for (i=0; i<mdn->A->rmap->n; i++) {
887           tmp[j] += PetscAbsScalar(*v);  v++;
888         }
889       }
890       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
891       for (j=0; j<A->cmap->N; j++) {
892         if (tmp2[j] > *nrm) *nrm = tmp2[j];
893       }
894       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
895       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
896     } else if (type == NORM_INFINITY) { /* max row norm */
897       PetscReal ntemp;
898       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
899       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
900     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
901   }
902   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
903   PetscFunctionReturn(0);
904 }
905 
906 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
907 {
908   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
909   Mat            B;
910   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
911   PetscErrorCode ierr;
912   PetscInt       j,i,lda;
913   PetscScalar    *v;
914 
915   PetscFunctionBegin;
916   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
917     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
918     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
919     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
920     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
921   } else B = *matout;
922 
923   m    = a->A->rmap->n; n = a->A->cmap->n;
924   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
925   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
926   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
927   for (i=0; i<m; i++) rwork[i] = rstart + i;
928   for (j=0; j<n; j++) {
929     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
930     v   += lda;
931   }
932   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
933   ierr = PetscFree(rwork);CHKERRQ(ierr);
934   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
935   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
936   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
937     *matout = B;
938   } else {
939     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
940   }
941   PetscFunctionReturn(0);
942 }
943 
944 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
945 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
946 
947 PetscErrorCode MatSetUp_MPIDense(Mat A)
948 {
949   PetscErrorCode ierr;
950 
951   PetscFunctionBegin;
952   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
953   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
954   if (!A->preallocated) {
955     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
956   }
957   PetscFunctionReturn(0);
958 }
959 
960 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
961 {
962   PetscErrorCode ierr;
963   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
964 
965   PetscFunctionBegin;
966   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatConjugate_MPIDense(Mat mat)
971 {
972   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   ierr = MatConjugate(a->A);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
980 PetscErrorCode MatRealPart_MPIDense(Mat A)
981 {
982   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = MatRealPart(a->A);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
991 {
992   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
993   PetscErrorCode ierr;
994 
995   PetscFunctionBegin;
996   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1001 {
1002   PetscErrorCode ierr;
1003   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1004 
1005   PetscFunctionBegin;
1006   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1007   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1008   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1013 
1014 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1015 {
1016   PetscErrorCode ierr;
1017   PetscInt       i,n;
1018   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1019   PetscReal      *work;
1020 
1021   PetscFunctionBegin;
1022   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1024   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1025   if (type == NORM_2) {
1026     for (i=0; i<n; i++) work[i] *= work[i];
1027   }
1028   if (type == NORM_INFINITY) {
1029     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1030   } else {
1031     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(work);CHKERRQ(ierr);
1034   if (type == NORM_2) {
1035     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1036   }
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 #if defined(PETSC_HAVE_CUDA)
1041 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1042 {
1043   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1044   PetscErrorCode ierr;
1045   PetscInt       lda;
1046 
1047   PetscFunctionBegin;
1048   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1049   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1050   if (!a->cvec) {
1051     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1052     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1053   }
1054   a->vecinuse = col + 1;
1055   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1056   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1057   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1058   *v   = a->cvec;
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1063 {
1064   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1065   PetscErrorCode ierr;
1066 
1067   PetscFunctionBegin;
1068   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1069   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1070   a->vecinuse = 0;
1071   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1072   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1073   *v   = NULL;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1078 {
1079   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1080   PetscInt       lda;
1081   PetscErrorCode ierr;
1082 
1083   PetscFunctionBegin;
1084   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1085   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1086   if (!a->cvec) {
1087     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1088     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1089   }
1090   a->vecinuse = col + 1;
1091   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1092   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1093   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1094   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1095   *v   = a->cvec;
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1100 {
1101   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1106   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1107   a->vecinuse = 0;
1108   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1109   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1110   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1111   *v   = NULL;
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1116 {
1117   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1118   PetscErrorCode ierr;
1119   PetscInt       lda;
1120 
1121   PetscFunctionBegin;
1122   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1123   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1124   if (!a->cvec) {
1125     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1126     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1127   }
1128   a->vecinuse = col + 1;
1129   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1130   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1131   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1132   *v   = a->cvec;
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1137 {
1138   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1139   PetscErrorCode ierr;
1140 
1141   PetscFunctionBegin;
1142   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1143   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1144   a->vecinuse = 0;
1145   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1146   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1147   *v   = NULL;
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1152 {
1153   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1154   PetscErrorCode ierr;
1155 
1156   PetscFunctionBegin;
1157   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1158   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1159   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1164 {
1165   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1166   PetscErrorCode ierr;
1167 
1168   PetscFunctionBegin;
1169   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1170   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1171   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1176 {
1177   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1178   PetscErrorCode ierr;
1179 
1180   PetscFunctionBegin;
1181   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1182   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1183   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1188 {
1189   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1190   PetscErrorCode ierr;
1191 
1192   PetscFunctionBegin;
1193   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1198 {
1199   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1208 {
1209   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1210   PetscErrorCode ierr;
1211 
1212   PetscFunctionBegin;
1213   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1218 {
1219   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1220   PetscErrorCode ierr;
1221 
1222   PetscFunctionBegin;
1223   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1228 {
1229   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1230   PetscErrorCode ierr;
1231 
1232   PetscFunctionBegin;
1233   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1238 {
1239   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1248 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1249 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1250 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1251 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1252 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1253 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1254 
1255 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1256 {
1257   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1262   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1263   if (d->A) {
1264     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1265   }
1266   mat->boundtocpu = bind;
1267   if (!bind) {
1268     PetscBool iscuda;
1269 
1270     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1271     if (!iscuda) {
1272       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1273     }
1274     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1275     if (!iscuda) {
1276       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1277     }
1278     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1279     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1280     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1281     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1282     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1283     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1284   } else {
1285     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1286     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1287     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1288     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1289     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1290     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1291   }
1292   if (d->cmat) {
1293     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1299 {
1300   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1301   PetscErrorCode ierr;
1302   PetscBool      iscuda;
1303 
1304   PetscFunctionBegin;
1305   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1306   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1307   if (!iscuda) PetscFunctionReturn(0);
1308   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1309   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1310   if (!d->A) {
1311     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1312     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1313     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1314   }
1315   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1316   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1317   A->preallocated = PETSC_TRUE;
1318   PetscFunctionReturn(0);
1319 }
1320 #endif
1321 
1322 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1323 {
1324   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1333 {
1334   PetscFunctionBegin;
1335   *missing = PETSC_FALSE;
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1340 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1341 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1342 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1343 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1344 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1345 
1346 /* -------------------------------------------------------------------*/
1347 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1348                                         MatGetRow_MPIDense,
1349                                         MatRestoreRow_MPIDense,
1350                                         MatMult_MPIDense,
1351                                 /*  4*/ MatMultAdd_MPIDense,
1352                                         MatMultTranspose_MPIDense,
1353                                         MatMultTransposeAdd_MPIDense,
1354                                         NULL,
1355                                         NULL,
1356                                         NULL,
1357                                 /* 10*/ NULL,
1358                                         NULL,
1359                                         NULL,
1360                                         NULL,
1361                                         MatTranspose_MPIDense,
1362                                 /* 15*/ MatGetInfo_MPIDense,
1363                                         MatEqual_MPIDense,
1364                                         MatGetDiagonal_MPIDense,
1365                                         MatDiagonalScale_MPIDense,
1366                                         MatNorm_MPIDense,
1367                                 /* 20*/ MatAssemblyBegin_MPIDense,
1368                                         MatAssemblyEnd_MPIDense,
1369                                         MatSetOption_MPIDense,
1370                                         MatZeroEntries_MPIDense,
1371                                 /* 24*/ MatZeroRows_MPIDense,
1372                                         NULL,
1373                                         NULL,
1374                                         NULL,
1375                                         NULL,
1376                                 /* 29*/ MatSetUp_MPIDense,
1377                                         NULL,
1378                                         NULL,
1379                                         MatGetDiagonalBlock_MPIDense,
1380                                         NULL,
1381                                 /* 34*/ MatDuplicate_MPIDense,
1382                                         NULL,
1383                                         NULL,
1384                                         NULL,
1385                                         NULL,
1386                                 /* 39*/ MatAXPY_MPIDense,
1387                                         MatCreateSubMatrices_MPIDense,
1388                                         NULL,
1389                                         MatGetValues_MPIDense,
1390                                         NULL,
1391                                 /* 44*/ NULL,
1392                                         MatScale_MPIDense,
1393                                         MatShift_Basic,
1394                                         NULL,
1395                                         NULL,
1396                                 /* 49*/ MatSetRandom_MPIDense,
1397                                         NULL,
1398                                         NULL,
1399                                         NULL,
1400                                         NULL,
1401                                 /* 54*/ NULL,
1402                                         NULL,
1403                                         NULL,
1404                                         NULL,
1405                                         NULL,
1406                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1407                                         MatDestroy_MPIDense,
1408                                         MatView_MPIDense,
1409                                         NULL,
1410                                         NULL,
1411                                 /* 64*/ NULL,
1412                                         NULL,
1413                                         NULL,
1414                                         NULL,
1415                                         NULL,
1416                                 /* 69*/ NULL,
1417                                         NULL,
1418                                         NULL,
1419                                         NULL,
1420                                         NULL,
1421                                 /* 74*/ NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                         NULL,
1426                                 /* 79*/ NULL,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                 /* 83*/ MatLoad_MPIDense,
1431                                         NULL,
1432                                         NULL,
1433                                         NULL,
1434                                         NULL,
1435                                         NULL,
1436                                 /* 89*/ NULL,
1437                                         NULL,
1438                                         NULL,
1439                                         NULL,
1440                                         NULL,
1441                                 /* 94*/ NULL,
1442                                         NULL,
1443                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1444                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1445                                         NULL,
1446                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1447                                         NULL,
1448                                         NULL,
1449                                         MatConjugate_MPIDense,
1450                                         NULL,
1451                                 /*104*/ NULL,
1452                                         MatRealPart_MPIDense,
1453                                         MatImaginaryPart_MPIDense,
1454                                         NULL,
1455                                         NULL,
1456                                 /*109*/ NULL,
1457                                         NULL,
1458                                         NULL,
1459                                         MatGetColumnVector_MPIDense,
1460                                         MatMissingDiagonal_MPIDense,
1461                                 /*114*/ NULL,
1462                                         NULL,
1463                                         NULL,
1464                                         NULL,
1465                                         NULL,
1466                                 /*119*/ NULL,
1467                                         NULL,
1468                                         NULL,
1469                                         NULL,
1470                                         NULL,
1471                                 /*124*/ NULL,
1472                                         MatGetColumnNorms_MPIDense,
1473                                         NULL,
1474                                         NULL,
1475                                         NULL,
1476                                 /*129*/ NULL,
1477                                         NULL,
1478                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1479                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1480                                         NULL,
1481                                 /*134*/ NULL,
1482                                         NULL,
1483                                         NULL,
1484                                         NULL,
1485                                         NULL,
1486                                 /*139*/ NULL,
1487                                         NULL,
1488                                         NULL,
1489                                         NULL,
1490                                         NULL,
1491                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1492                                 /*145*/ NULL,
1493                                         NULL,
1494                                         NULL
1495 };
1496 
1497 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1498 {
1499   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1500   PetscBool      iscuda;
1501   PetscErrorCode ierr;
1502 
1503   PetscFunctionBegin;
1504   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1505   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1506   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1507   if (!a->A) {
1508     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1509     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1510     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1511   }
1512   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1513   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1514   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1515   mat->preallocated = PETSC_TRUE;
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #if defined(PETSC_HAVE_ELEMENTAL)
1520 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1521 {
1522   Mat            mat_elemental;
1523   PetscErrorCode ierr;
1524   PetscScalar    *v;
1525   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1526 
1527   PetscFunctionBegin;
1528   if (reuse == MAT_REUSE_MATRIX) {
1529     mat_elemental = *newmat;
1530     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1531   } else {
1532     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1533     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1534     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1535     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1536     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1537   }
1538 
1539   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1540   for (i=0; i<N; i++) cols[i] = i;
1541   for (i=0; i<m; i++) rows[i] = rstart + i;
1542 
1543   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1544   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1545   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1546   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1547   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1548   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1549   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1550 
1551   if (reuse == MAT_INPLACE_MATRIX) {
1552     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1553   } else {
1554     *newmat = mat_elemental;
1555   }
1556   PetscFunctionReturn(0);
1557 }
1558 #endif
1559 
1560 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1561 {
1562   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1563   PetscErrorCode ierr;
1564 
1565   PetscFunctionBegin;
1566   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1571 {
1572   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1573   PetscErrorCode ierr;
1574 
1575   PetscFunctionBegin;
1576   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1581 {
1582   PetscErrorCode ierr;
1583   Mat_MPIDense   *mat;
1584   PetscInt       m,nloc,N;
1585 
1586   PetscFunctionBegin;
1587   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1588   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1589   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1590     PetscInt sum;
1591 
1592     if (n == PETSC_DECIDE) {
1593       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1594     }
1595     /* Check sum(n) = N */
1596     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1597     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1598 
1599     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1600   }
1601 
1602   /* numeric phase */
1603   mat = (Mat_MPIDense*)(*outmat)->data;
1604   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1605   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1606   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 #if defined(PETSC_HAVE_CUDA)
1611 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1612 {
1613   Mat            B;
1614   Mat_MPIDense   *m;
1615   PetscErrorCode ierr;
1616 
1617   PetscFunctionBegin;
1618   if (reuse == MAT_INITIAL_MATRIX) {
1619     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1620   } else if (reuse == MAT_REUSE_MATRIX) {
1621     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1622   }
1623 
1624   B    = *newmat;
1625   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1626   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1627   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1628   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1629   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1630   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1632   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1633   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1635   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1636   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1637   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1638   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1639   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1643   m    = (Mat_MPIDense*)(B)->data;
1644   if (m->A) {
1645     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1646     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1647   }
1648   B->ops->bindtocpu = NULL;
1649   B->offloadmask    = PETSC_OFFLOAD_CPU;
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1654 {
1655   Mat            B;
1656   Mat_MPIDense   *m;
1657   PetscErrorCode ierr;
1658 
1659   PetscFunctionBegin;
1660   if (reuse == MAT_INITIAL_MATRIX) {
1661     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1662   } else if (reuse == MAT_REUSE_MATRIX) {
1663     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1664   }
1665 
1666   B    = *newmat;
1667   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1668   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1669   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1684   m    = (Mat_MPIDense*)(B)->data;
1685   if (m->A) {
1686     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1687     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1688     B->offloadmask = PETSC_OFFLOAD_BOTH;
1689   } else {
1690     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1691   }
1692   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1693 
1694   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1695   PetscFunctionReturn(0);
1696 }
1697 #endif
1698 
1699 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1700 {
1701   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1702   PetscErrorCode ierr;
1703   PetscInt       lda;
1704 
1705   PetscFunctionBegin;
1706   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1707   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1708   if (!a->cvec) {
1709     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1710   }
1711   a->vecinuse = col + 1;
1712   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1713   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1714   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1715   *v   = a->cvec;
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1720 {
1721   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1722   PetscErrorCode ierr;
1723 
1724   PetscFunctionBegin;
1725   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1726   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1727   a->vecinuse = 0;
1728   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1729   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1730   *v   = NULL;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1735 {
1736   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1737   PetscErrorCode ierr;
1738   PetscInt       lda;
1739 
1740   PetscFunctionBegin;
1741   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1742   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1743   if (!a->cvec) {
1744     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1745   }
1746   a->vecinuse = col + 1;
1747   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1748   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1749   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1750   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1751   *v   = a->cvec;
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1756 {
1757   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1758   PetscErrorCode ierr;
1759 
1760   PetscFunctionBegin;
1761   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1762   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1763   a->vecinuse = 0;
1764   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1765   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1766   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1767   *v   = NULL;
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1772 {
1773   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1774   PetscErrorCode ierr;
1775   PetscInt       lda;
1776 
1777   PetscFunctionBegin;
1778   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1779   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1780   if (!a->cvec) {
1781     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1782   }
1783   a->vecinuse = col + 1;
1784   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1785   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1786   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1787   *v   = a->cvec;
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1792 {
1793   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1794   PetscErrorCode ierr;
1795 
1796   PetscFunctionBegin;
1797   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1798   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1799   a->vecinuse = 0;
1800   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1801   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1802   *v   = NULL;
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1807 {
1808   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1809   Mat_MPIDense   *c;
1810   PetscErrorCode ierr;
1811   MPI_Comm       comm;
1812   PetscBool      setup = PETSC_FALSE;
1813 
1814   PetscFunctionBegin;
1815   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1816   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1817   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1818   if (!a->cmat) {
1819     setup = PETSC_TRUE;
1820     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1821     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1822     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1823     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1824     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1825     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1826   } else if (cend-cbegin != a->cmat->cmap->N) {
1827     setup = PETSC_TRUE;
1828     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1829     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1830     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1831     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1832   }
1833   c = (Mat_MPIDense*)a->cmat->data;
1834   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1835   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1836   if (setup) { /* do we really need this? */
1837     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1838   }
1839   a->cmat->preallocated = PETSC_TRUE;
1840   a->cmat->assembled = PETSC_TRUE;
1841   a->matinuse = cbegin + 1;
1842   *v = a->cmat;
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1847 {
1848   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1849   Mat_MPIDense   *c;
1850   PetscErrorCode ierr;
1851 
1852   PetscFunctionBegin;
1853   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1854   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1855   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1856   a->matinuse = 0;
1857   c    = (Mat_MPIDense*)a->cmat->data;
1858   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1859   *v   = NULL;
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 /*MC
1864    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1865 
1866    Options Database Keys:
1867 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1868 
1869   Level: beginner
1870 
1871 .seealso: MatCreateDense()
1872 
1873 M*/
1874 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1875 {
1876   Mat_MPIDense   *a;
1877   PetscErrorCode ierr;
1878 
1879   PetscFunctionBegin;
1880   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1881   mat->data = (void*)a;
1882   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1883 
1884   mat->insertmode = NOT_SET_VALUES;
1885 
1886   /* build cache for off array entries formed */
1887   a->donotstash = PETSC_FALSE;
1888 
1889   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1890 
1891   /* stuff used for matrix vector multiply */
1892   a->lvec        = NULL;
1893   a->Mvctx       = NULL;
1894   a->roworiented = PETSC_TRUE;
1895 
1896   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1897   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1898   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1899   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1900   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1915 #if defined(PETSC_HAVE_ELEMENTAL)
1916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1917 #endif
1918 #if defined(PETSC_HAVE_SCALAPACK)
1919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1920 #endif
1921 #if defined(PETSC_HAVE_CUDA)
1922   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1923 #endif
1924   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1925   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1926   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1927 #if defined(PETSC_HAVE_CUDA)
1928   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1929   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1930 #endif
1931 
1932   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1933   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1934   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 /*MC
1939    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1940 
1941    Options Database Keys:
1942 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1943 
1944   Level: beginner
1945 
1946 .seealso:
1947 
1948 M*/
1949 #if defined(PETSC_HAVE_CUDA)
1950 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1951 {
1952   PetscErrorCode ierr;
1953 
1954   PetscFunctionBegin;
1955   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
1956   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1957   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 #endif
1961 
1962 /*MC
1963    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1964 
1965    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1966    and MATMPIDENSE otherwise.
1967 
1968    Options Database Keys:
1969 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1970 
1971   Level: beginner
1972 
1973 
1974 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1975 M*/
1976 
1977 /*MC
1978    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1979 
1980    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1981    and MATMPIDENSECUDA otherwise.
1982 
1983    Options Database Keys:
1984 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1985 
1986   Level: beginner
1987 
1988 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1989 M*/
1990 
1991 /*@C
1992    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1993 
1994    Collective
1995 
1996    Input Parameters:
1997 .  B - the matrix
1998 -  data - optional location of matrix data.  Set data=NULL for PETSc
1999    to control all matrix memory allocation.
2000 
2001    Notes:
2002    The dense format is fully compatible with standard Fortran 77
2003    storage by columns.
2004 
2005    The data input variable is intended primarily for Fortran programmers
2006    who wish to allocate their own matrix memory space.  Most users should
2007    set data=NULL.
2008 
2009    Level: intermediate
2010 
2011 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2012 @*/
2013 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2014 {
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2019   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 /*@
2024    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2025    array provided by the user. This is useful to avoid copying an array
2026    into a matrix
2027 
2028    Not Collective
2029 
2030    Input Parameters:
2031 +  mat - the matrix
2032 -  array - the array in column major order
2033 
2034    Notes:
2035    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2036    freed when the matrix is destroyed.
2037 
2038    Level: developer
2039 
2040 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2041 
2042 @*/
2043 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2044 {
2045   PetscErrorCode ierr;
2046 
2047   PetscFunctionBegin;
2048   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2049   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2050   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2051 #if defined(PETSC_HAVE_CUDA)
2052   mat->offloadmask = PETSC_OFFLOAD_CPU;
2053 #endif
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 /*@
2058    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2059 
2060    Not Collective
2061 
2062    Input Parameters:
2063 .  mat - the matrix
2064 
2065    Notes:
2066    You can only call this after a call to MatDensePlaceArray()
2067 
2068    Level: developer
2069 
2070 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2071 
2072 @*/
2073 PetscErrorCode  MatDenseResetArray(Mat mat)
2074 {
2075   PetscErrorCode ierr;
2076 
2077   PetscFunctionBegin;
2078   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2079   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2080   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 /*@
2085    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2086    array provided by the user. This is useful to avoid copying an array
2087    into a matrix
2088 
2089    Not Collective
2090 
2091    Input Parameters:
2092 +  mat - the matrix
2093 -  array - the array in column major order
2094 
2095    Notes:
2096    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2097    freed by the user. It will be freed when the matrix is destroyed.
2098 
2099    Level: developer
2100 
2101 .seealso: MatDenseGetArray(), VecReplaceArray()
2102 @*/
2103 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2104 {
2105   PetscErrorCode ierr;
2106 
2107   PetscFunctionBegin;
2108   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2109   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2110   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2111 #if defined(PETSC_HAVE_CUDA)
2112   mat->offloadmask = PETSC_OFFLOAD_CPU;
2113 #endif
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 #if defined(PETSC_HAVE_CUDA)
2118 /*@C
2119    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2120    array provided by the user. This is useful to avoid copying an array
2121    into a matrix
2122 
2123    Not Collective
2124 
2125    Input Parameters:
2126 +  mat - the matrix
2127 -  array - the array in column major order
2128 
2129    Notes:
2130    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2131    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2132 
2133    Level: developer
2134 
2135 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2136 @*/
2137 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2138 {
2139   PetscErrorCode ierr;
2140 
2141   PetscFunctionBegin;
2142   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2143   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2144   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2145   mat->offloadmask = PETSC_OFFLOAD_GPU;
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 /*@C
2150    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2151 
2152    Not Collective
2153 
2154    Input Parameters:
2155 .  mat - the matrix
2156 
2157    Notes:
2158    You can only call this after a call to MatDenseCUDAPlaceArray()
2159 
2160    Level: developer
2161 
2162 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2163 
2164 @*/
2165 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2166 {
2167   PetscErrorCode ierr;
2168 
2169   PetscFunctionBegin;
2170   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2171   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2172   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 /*@C
2177    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2178    array provided by the user. This is useful to avoid copying an array
2179    into a matrix
2180 
2181    Not Collective
2182 
2183    Input Parameters:
2184 +  mat - the matrix
2185 -  array - the array in column major order
2186 
2187    Notes:
2188    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2189    The memory passed in CANNOT be freed by the user. It will be freed
2190    when the matrix is destroyed. The array should respect the matrix leading dimension.
2191 
2192    Level: developer
2193 
2194 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2195 @*/
2196 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2197 {
2198   PetscErrorCode ierr;
2199 
2200   PetscFunctionBegin;
2201   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2202   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2203   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2204   mat->offloadmask = PETSC_OFFLOAD_GPU;
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 /*@C
2209    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2210 
2211    Not Collective
2212 
2213    Input Parameters:
2214 .  A - the matrix
2215 
2216    Output Parameters
2217 .  array - the GPU array in column major order
2218 
2219    Notes:
2220    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2221 
2222    Level: developer
2223 
2224 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2225 @*/
2226 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2227 {
2228   PetscErrorCode ierr;
2229 
2230   PetscFunctionBegin;
2231   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2232   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2233   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 /*@C
2238    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2239 
2240    Not Collective
2241 
2242    Input Parameters:
2243 +  A - the matrix
2244 -  array - the GPU array in column major order
2245 
2246    Notes:
2247 
2248    Level: developer
2249 
2250 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2251 @*/
2252 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2253 {
2254   PetscErrorCode ierr;
2255 
2256   PetscFunctionBegin;
2257   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2258   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2259   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2260   A->offloadmask = PETSC_OFFLOAD_GPU;
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 /*@C
2265    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2266 
2267    Not Collective
2268 
2269    Input Parameters:
2270 .  A - the matrix
2271 
2272    Output Parameters
2273 .  array - the GPU array in column major order
2274 
2275    Notes:
2276    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2277 
2278    Level: developer
2279 
2280 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2281 @*/
2282 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2283 {
2284   PetscErrorCode ierr;
2285 
2286   PetscFunctionBegin;
2287   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2288   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 /*@C
2293    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2294 
2295    Not Collective
2296 
2297    Input Parameters:
2298 +  A - the matrix
2299 -  array - the GPU array in column major order
2300 
2301    Notes:
2302    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2303 
2304    Level: developer
2305 
2306 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2307 @*/
2308 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2309 {
2310   PetscErrorCode ierr;
2311 
2312   PetscFunctionBegin;
2313   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 /*@C
2318    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2319 
2320    Not Collective
2321 
2322    Input Parameters:
2323 .  A - the matrix
2324 
2325    Output Parameters
2326 .  array - the GPU array in column major order
2327 
2328    Notes:
2329    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2330 
2331    Level: developer
2332 
2333 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2334 @*/
2335 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2336 {
2337   PetscErrorCode ierr;
2338 
2339   PetscFunctionBegin;
2340   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2341   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2342   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 /*@C
2347    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2348 
2349    Not Collective
2350 
2351    Input Parameters:
2352 +  A - the matrix
2353 -  array - the GPU array in column major order
2354 
2355    Notes:
2356 
2357    Level: developer
2358 
2359 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2360 @*/
2361 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2362 {
2363   PetscErrorCode ierr;
2364 
2365   PetscFunctionBegin;
2366   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2367   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2368   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2369   A->offloadmask = PETSC_OFFLOAD_GPU;
2370   PetscFunctionReturn(0);
2371 }
2372 #endif
2373 
2374 /*@C
2375    MatCreateDense - Creates a matrix in dense format.
2376 
2377    Collective
2378 
2379    Input Parameters:
2380 +  comm - MPI communicator
2381 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2382 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2383 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2384 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2385 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2386    to control all matrix memory allocation.
2387 
2388    Output Parameter:
2389 .  A - the matrix
2390 
2391    Notes:
2392    The dense format is fully compatible with standard Fortran 77
2393    storage by columns.
2394    Note that, although local portions of the matrix are stored in column-major
2395    order, the matrix is partitioned across MPI ranks by row.
2396 
2397    The data input variable is intended primarily for Fortran programmers
2398    who wish to allocate their own matrix memory space.  Most users should
2399    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2400 
2401    The user MUST specify either the local or global matrix dimensions
2402    (possibly both).
2403 
2404    Level: intermediate
2405 
2406 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2407 @*/
2408 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2409 {
2410   PetscErrorCode ierr;
2411   PetscMPIInt    size;
2412 
2413   PetscFunctionBegin;
2414   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2415   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2416   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2417   if (size > 1) {
2418     PetscBool havedata = (PetscBool)!!data;
2419 
2420     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2421     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2422     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr);
2423     if (havedata) {  /* user provided data array, so no need to assemble */
2424       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2425       (*A)->assembled = PETSC_TRUE;
2426     }
2427   } else {
2428     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2429     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2430   }
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #if defined(PETSC_HAVE_CUDA)
2435 /*@C
2436    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2437 
2438    Collective
2439 
2440    Input Parameters:
2441 +  comm - MPI communicator
2442 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2443 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2444 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2445 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2446 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2447    to control matrix memory allocation.
2448 
2449    Output Parameter:
2450 .  A - the matrix
2451 
2452    Notes:
2453 
2454    Level: intermediate
2455 
2456 .seealso: MatCreate(), MatCreateDense()
2457 @*/
2458 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2459 {
2460   PetscErrorCode ierr;
2461   PetscMPIInt    size;
2462 
2463   PetscFunctionBegin;
2464   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2465   PetscValidLogicalCollectiveBool(*A,!!data,6);
2466   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2467   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2468   if (size > 1) {
2469     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2470     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2471     if (data) {  /* user provided data array, so no need to assemble */
2472       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2473       (*A)->assembled = PETSC_TRUE;
2474     }
2475   } else {
2476     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2477     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2478   }
2479   PetscFunctionReturn(0);
2480 }
2481 #endif
2482 
2483 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2484 {
2485   Mat            mat;
2486   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2487   PetscErrorCode ierr;
2488 
2489   PetscFunctionBegin;
2490   *newmat = NULL;
2491   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2492   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2493   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2494   a       = (Mat_MPIDense*)mat->data;
2495 
2496   mat->factortype   = A->factortype;
2497   mat->assembled    = PETSC_TRUE;
2498   mat->preallocated = PETSC_TRUE;
2499 
2500   mat->insertmode = NOT_SET_VALUES;
2501   a->donotstash   = oldmat->donotstash;
2502 
2503   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2504   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2505 
2506   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2507   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2508   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2509 
2510   *newmat = mat;
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2515 {
2516   PetscErrorCode ierr;
2517   PetscBool      isbinary;
2518 #if defined(PETSC_HAVE_HDF5)
2519   PetscBool      ishdf5;
2520 #endif
2521 
2522   PetscFunctionBegin;
2523   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2524   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2525   /* force binary viewer to load .info file if it has not yet done so */
2526   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2527   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2528 #if defined(PETSC_HAVE_HDF5)
2529   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2530 #endif
2531   if (isbinary) {
2532     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2533 #if defined(PETSC_HAVE_HDF5)
2534   } else if (ishdf5) {
2535     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2536 #endif
2537   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2538   PetscFunctionReturn(0);
2539 }
2540 
2541 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2542 {
2543   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2544   Mat            a,b;
2545   PetscBool      flg;
2546   PetscErrorCode ierr;
2547 
2548   PetscFunctionBegin;
2549   a    = matA->A;
2550   b    = matB->A;
2551   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2552   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2557 {
2558   PetscErrorCode        ierr;
2559   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2560 
2561   PetscFunctionBegin;
2562   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2563   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2564   ierr = PetscFree(atb);CHKERRQ(ierr);
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2569 {
2570   PetscErrorCode        ierr;
2571   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2572 
2573   PetscFunctionBegin;
2574   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2575   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2576   ierr = PetscFree(abt);CHKERRQ(ierr);
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2581 {
2582   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2583   Mat_TransMatMultDense *atb;
2584   PetscErrorCode        ierr;
2585   MPI_Comm              comm;
2586   PetscMPIInt           size,*recvcounts;
2587   PetscScalar           *carray,*sendbuf;
2588   const PetscScalar     *atbarray;
2589   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2590   const PetscInt        *ranges;
2591 
2592   PetscFunctionBegin;
2593   MatCheckProduct(C,3);
2594   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2595   atb = (Mat_TransMatMultDense *)C->product->data;
2596   recvcounts = atb->recvcounts;
2597   sendbuf = atb->sendbuf;
2598 
2599   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2600   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2601 
2602   /* compute atbarray = aseq^T * bseq */
2603   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2604 
2605   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2606 
2607   /* arrange atbarray into sendbuf */
2608   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2609   for (proc=0, k=0; proc<size; proc++) {
2610     for (j=0; j<cN; j++) {
2611       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2612     }
2613   }
2614   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2615 
2616   /* sum all atbarray to local values of C */
2617   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2618   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2619   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2620   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2621   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2626 {
2627   PetscErrorCode        ierr;
2628   MPI_Comm              comm;
2629   PetscMPIInt           size;
2630   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2631   Mat_TransMatMultDense *atb;
2632   PetscBool             cisdense;
2633   PetscInt              i;
2634   const PetscInt        *ranges;
2635 
2636   PetscFunctionBegin;
2637   MatCheckProduct(C,3);
2638   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2639   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2640   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2641     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2642   }
2643 
2644   /* create matrix product C */
2645   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2646   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2647   if (!cisdense) {
2648     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2649   }
2650   ierr = MatSetUp(C);CHKERRQ(ierr);
2651 
2652   /* create data structure for reuse C */
2653   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2654   ierr = PetscNew(&atb);CHKERRQ(ierr);
2655   cM   = C->rmap->N;
2656   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2657   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2658   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2659 
2660   C->product->data    = atb;
2661   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2666 {
2667   PetscErrorCode        ierr;
2668   MPI_Comm              comm;
2669   PetscMPIInt           i, size;
2670   PetscInt              maxRows, bufsiz;
2671   PetscMPIInt           tag;
2672   PetscInt              alg;
2673   Mat_MatTransMultDense *abt;
2674   Mat_Product           *product = C->product;
2675   PetscBool             flg;
2676 
2677   PetscFunctionBegin;
2678   MatCheckProduct(C,4);
2679   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2680   /* check local size of A and B */
2681   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2682 
2683   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2684   alg  = flg ? 0 : 1;
2685 
2686   /* setup matrix product C */
2687   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2688   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2689   ierr = MatSetUp(C);CHKERRQ(ierr);
2690   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2691 
2692   /* create data structure for reuse C */
2693   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2694   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2695   ierr = PetscNew(&abt);CHKERRQ(ierr);
2696   abt->tag = tag;
2697   abt->alg = alg;
2698   switch (alg) {
2699   case 1: /* alg: "cyclic" */
2700     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2701     bufsiz = A->cmap->N * maxRows;
2702     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2703     break;
2704   default: /* alg: "allgatherv" */
2705     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2706     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2707     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2708     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2709     break;
2710   }
2711 
2712   C->product->data    = abt;
2713   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2714   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2719 {
2720   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2721   Mat_MatTransMultDense *abt;
2722   PetscErrorCode        ierr;
2723   MPI_Comm              comm;
2724   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2725   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2726   PetscInt              i,cK=A->cmap->N,k,j,bn;
2727   PetscScalar           _DOne=1.0,_DZero=0.0;
2728   const PetscScalar     *av,*bv;
2729   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2730   MPI_Request           reqs[2];
2731   const PetscInt        *ranges;
2732 
2733   PetscFunctionBegin;
2734   MatCheckProduct(C,3);
2735   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2736   abt  = (Mat_MatTransMultDense*)C->product->data;
2737   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2738   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2739   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2740   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2741   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2742   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2743   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2744   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2745   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2746   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2747   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2748   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2749   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2750   bn   = B->rmap->n;
2751   if (blda == bn) {
2752     sendbuf = (PetscScalar*)bv;
2753   } else {
2754     sendbuf = abt->buf[0];
2755     for (k = 0, i = 0; i < cK; i++) {
2756       for (j = 0; j < bn; j++, k++) {
2757         sendbuf[k] = bv[i * blda + j];
2758       }
2759     }
2760   }
2761   if (size > 1) {
2762     sendto = (rank + size - 1) % size;
2763     recvfrom = (rank + size + 1) % size;
2764   } else {
2765     sendto = recvfrom = 0;
2766   }
2767   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2768   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2769   recvisfrom = rank;
2770   for (i = 0; i < size; i++) {
2771     /* we have finished receiving in sending, bufs can be read/modified */
2772     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2773     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2774 
2775     if (nextrecvisfrom != rank) {
2776       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2777       sendsiz = cK * bn;
2778       recvsiz = cK * nextbn;
2779       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2780       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2781       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2782     }
2783 
2784     /* local aseq * sendbuf^T */
2785     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2786     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2787 
2788     if (nextrecvisfrom != rank) {
2789       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2790       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2791     }
2792     bn = nextbn;
2793     recvisfrom = nextrecvisfrom;
2794     sendbuf = recvbuf;
2795   }
2796   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2797   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2798   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2799   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2800   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2805 {
2806   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2807   Mat_MatTransMultDense *abt;
2808   PetscErrorCode        ierr;
2809   MPI_Comm              comm;
2810   PetscMPIInt           size;
2811   PetscScalar           *cv, *sendbuf, *recvbuf;
2812   const PetscScalar     *av,*bv;
2813   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2814   PetscScalar           _DOne=1.0,_DZero=0.0;
2815   PetscBLASInt          cm, cn, ck, alda, clda;
2816 
2817   PetscFunctionBegin;
2818   MatCheckProduct(C,3);
2819   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2820   abt  = (Mat_MatTransMultDense*)C->product->data;
2821   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2822   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2823   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2824   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2825   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2826   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2827   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2828   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2829   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2830   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2831   /* copy transpose of B into buf[0] */
2832   bn      = B->rmap->n;
2833   sendbuf = abt->buf[0];
2834   recvbuf = abt->buf[1];
2835   for (k = 0, j = 0; j < bn; j++) {
2836     for (i = 0; i < cK; i++, k++) {
2837       sendbuf[k] = bv[i * blda + j];
2838     }
2839   }
2840   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2841   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2842   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2843   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2844   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2845   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2846   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2847   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2848   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2849   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2850   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2855 {
2856   Mat_MatTransMultDense *abt;
2857   PetscErrorCode        ierr;
2858 
2859   PetscFunctionBegin;
2860   MatCheckProduct(C,3);
2861   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2862   abt = (Mat_MatTransMultDense*)C->product->data;
2863   switch (abt->alg) {
2864   case 1:
2865     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2866     break;
2867   default:
2868     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2869     break;
2870   }
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2875 {
2876   PetscErrorCode   ierr;
2877   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2878 
2879   PetscFunctionBegin;
2880   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2881   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2882   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2883   ierr = PetscFree(ab);CHKERRQ(ierr);
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 #if defined(PETSC_HAVE_ELEMENTAL)
2888 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2889 {
2890   PetscErrorCode   ierr;
2891   Mat_MatMultDense *ab;
2892 
2893   PetscFunctionBegin;
2894   MatCheckProduct(C,3);
2895   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2896   ab   = (Mat_MatMultDense*)C->product->data;
2897   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2898   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2899   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2900   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2901   PetscFunctionReturn(0);
2902 }
2903 
2904 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2905 {
2906   PetscErrorCode   ierr;
2907   Mat              Ae,Be,Ce;
2908   Mat_MatMultDense *ab;
2909 
2910   PetscFunctionBegin;
2911   MatCheckProduct(C,4);
2912   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2913   /* check local size of A and B */
2914   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2915     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2916   }
2917 
2918   /* create elemental matrices Ae and Be */
2919   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2920   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2921   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2922   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2923   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2924 
2925   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2926   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2927   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2928   ierr = MatSetUp(Be);CHKERRQ(ierr);
2929   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2930 
2931   /* compute symbolic Ce = Ae*Be */
2932   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2933   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2934 
2935   /* setup C */
2936   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2937   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2938   ierr = MatSetUp(C);CHKERRQ(ierr);
2939 
2940   /* create data structure for reuse Cdense */
2941   ierr = PetscNew(&ab);CHKERRQ(ierr);
2942   ab->Ae = Ae;
2943   ab->Be = Be;
2944   ab->Ce = Ce;
2945 
2946   C->product->data    = ab;
2947   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2948   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2949   PetscFunctionReturn(0);
2950 }
2951 #endif
2952 /* ----------------------------------------------- */
2953 #if defined(PETSC_HAVE_ELEMENTAL)
2954 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2955 {
2956   PetscFunctionBegin;
2957   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2958   C->ops->productsymbolic = MatProductSymbolic_AB;
2959   PetscFunctionReturn(0);
2960 }
2961 #endif
2962 
2963 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2964 {
2965   Mat_Product *product = C->product;
2966   Mat         A = product->A,B=product->B;
2967 
2968   PetscFunctionBegin;
2969   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2970     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2971   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2972   C->ops->productsymbolic = MatProductSymbolic_AtB;
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2977 {
2978   PetscErrorCode ierr;
2979   Mat_Product    *product = C->product;
2980   const char     *algTypes[2] = {"allgatherv","cyclic"};
2981   PetscInt       alg,nalg = 2;
2982   PetscBool      flg = PETSC_FALSE;
2983 
2984   PetscFunctionBegin;
2985   /* Set default algorithm */
2986   alg = 0; /* default is allgatherv */
2987   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2988   if (flg) {
2989     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2990   }
2991 
2992   /* Get runtime option */
2993   if (product->api_user) {
2994     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2995     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2996     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2997   } else {
2998     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2999     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3000     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3001   }
3002   if (flg) {
3003     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3004   }
3005 
3006   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3007   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3008   PetscFunctionReturn(0);
3009 }
3010 
3011 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3012 {
3013   PetscErrorCode ierr;
3014   Mat_Product    *product = C->product;
3015 
3016   PetscFunctionBegin;
3017   switch (product->type) {
3018 #if defined(PETSC_HAVE_ELEMENTAL)
3019   case MATPRODUCT_AB:
3020     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3021     break;
3022 #endif
3023   case MATPRODUCT_AtB:
3024     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3025     break;
3026   case MATPRODUCT_ABt:
3027     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3028     break;
3029   default:
3030     break;
3031   }
3032   PetscFunctionReturn(0);
3033 }
3034