xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision cd1c4666a9c7786acffe2dfe971f76ff95fb06a8)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
44 {
45   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46   PetscErrorCode ierr;
47   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
48 
49   PetscFunctionBegin;
50   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
51   lrow = row - rstart;
52   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57 {
58   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
59   PetscErrorCode ierr;
60   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
61 
62   PetscFunctionBegin;
63   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
64   lrow = row - rstart;
65   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70 {
71   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
72   PetscErrorCode ierr;
73   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
74   PetscScalar    *array;
75   MPI_Comm       comm;
76   PetscBool      flg;
77   Mat            B;
78 
79   PetscFunctionBegin;
80   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
81   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
82   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
83   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
84 
85     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
86     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
87     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
88     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
89     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
90     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
91     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
92     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
93     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
94     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
95     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
96     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
97     *a   = B;
98     ierr = MatDestroy(&B);CHKERRQ(ierr);
99   } else *a = B;
100   PetscFunctionReturn(0);
101 }
102 
103 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
104 {
105   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
106   PetscErrorCode ierr;
107   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
108   PetscBool      roworiented = A->roworiented;
109 
110   PetscFunctionBegin;
111   for (i=0; i<m; i++) {
112     if (idxm[i] < 0) continue;
113     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
114     if (idxm[i] >= rstart && idxm[i] < rend) {
115       row = idxm[i] - rstart;
116       if (roworiented) {
117         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
118       } else {
119         for (j=0; j<n; j++) {
120           if (idxn[j] < 0) continue;
121           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
122           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
123         }
124       }
125     } else if (!A->donotstash) {
126       mat->assembled = PETSC_FALSE;
127       if (roworiented) {
128         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
129       } else {
130         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
131       }
132     }
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
160 {
161   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
162   PetscErrorCode ierr;
163 
164   PetscFunctionBegin;
165   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
170 {
171   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
172   PetscBool      iscuda;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   if (!a->A) {
177     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
178     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
179     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
180     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
181     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
182     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
183     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
184     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
185   }
186   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
191 {
192   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
193   PetscErrorCode ierr;
194 
195   PetscFunctionBegin;
196   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
197   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
230   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
231   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
232   PetscFunctionReturn(0);
233 }
234 
235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
236 {
237   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
238   PetscErrorCode ierr;
239 
240   PetscFunctionBegin;
241   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
242   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
243   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
248 {
249   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
250   PetscErrorCode ierr;
251 
252   PetscFunctionBegin;
253   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
254   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
255   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
260 {
261   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
262   PetscErrorCode    ierr;
263   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
264   const PetscInt    *irow,*icol;
265   const PetscScalar *v;
266   PetscScalar       *bv;
267   Mat               newmat;
268   IS                iscol_local;
269   MPI_Comm          comm_is,comm_mat;
270 
271   PetscFunctionBegin;
272   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
273   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
274   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
275 
276   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
277   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
278   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
279   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
280   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
281   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
282 
283   /* No parallel redistribution currently supported! Should really check each index set
284      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
285      original matrix! */
286 
287   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
288   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
289 
290   /* Check submatrix call */
291   if (scall == MAT_REUSE_MATRIX) {
292     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
293     /* Really need to test rows and column sizes! */
294     newmat = *B;
295   } else {
296     /* Create and fill new matrix */
297     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
298     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
299     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
300     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
301   }
302 
303   /* Now extract the data pointers and do the copy, column at a time */
304   newmatd = (Mat_MPIDense*)newmat->data;
305   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
306   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
307   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
308   for (i=0; i<Ncols; i++) {
309     const PetscScalar *av = v + lda*icol[i];
310     for (j=0; j<nrows; j++) {
311       *bv++ = av[irow[j] - rstart];
312     }
313   }
314   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
315   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
316 
317   /* Assemble the matrices so that the correct flags are set */
318   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320 
321   /* Free work space */
322   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
323   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
324   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
325   *B   = newmat;
326   PetscFunctionReturn(0);
327 }
328 
329 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
330 {
331   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
332   PetscErrorCode ierr;
333 
334   PetscFunctionBegin;
335   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
336   PetscFunctionReturn(0);
337 }
338 
339 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
340 {
341   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
342   PetscErrorCode ierr;
343 
344   PetscFunctionBegin;
345   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352   PetscErrorCode ierr;
353 
354   PetscFunctionBegin;
355   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
356   PetscFunctionReturn(0);
357 }
358 
359 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
360 {
361   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       nstash,reallocs;
364 
365   PetscFunctionBegin;
366   if (mdn->donotstash || mat->nooffprocentries) return(0);
367 
368   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
369   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
370   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
371   PetscFunctionReturn(0);
372 }
373 
374 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
375 {
376   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
377   PetscErrorCode ierr;
378   PetscInt       i,*row,*col,flg,j,rstart,ncols;
379   PetscMPIInt    n;
380   PetscScalar    *val;
381 
382   PetscFunctionBegin;
383   if (!mdn->donotstash && !mat->nooffprocentries) {
384     /*  wait on receives */
385     while (1) {
386       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
387       if (!flg) break;
388 
389       for (i=0; i<n;) {
390         /* Now identify the consecutive vals belonging to the same row */
391         for (j=i,rstart=row[j]; j<n; j++) {
392           if (row[j] != rstart) break;
393         }
394         if (j < n) ncols = j-i;
395         else       ncols = n-i;
396         /* Now assemble all these values with a single function call */
397         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
398         i    = j;
399       }
400     }
401     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
402   }
403 
404   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
405   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
406 
407   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
408     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
409   }
410   PetscFunctionReturn(0);
411 }
412 
413 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
414 {
415   PetscErrorCode ierr;
416   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
417 
418   PetscFunctionBegin;
419   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
423 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
424 {
425   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
426   PetscErrorCode    ierr;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
438     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
441     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
442   }
443   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
444   if (diag != 0.0) {
445     Vec d;
446 
447     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
448     ierr = VecSet(d,diag);CHKERRQ(ierr);
449     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
450     ierr = VecDestroy(&d);CHKERRQ(ierr);
451   }
452   ierr = PetscFree(lrows);CHKERRQ(ierr);
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   PetscErrorCode    ierr;
465   const PetscScalar *ax;
466   PetscScalar       *ay;
467 
468   PetscFunctionBegin;
469   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
470   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
471   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
472   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
473   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
474   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
475   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
480 {
481   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
482   PetscErrorCode    ierr;
483   const PetscScalar *ax;
484   PetscScalar       *ay;
485 
486   PetscFunctionBegin;
487   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
488   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
489   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
490   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
491   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
492   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
493   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
498 {
499   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
500   PetscErrorCode    ierr;
501   const PetscScalar *ax;
502   PetscScalar       *ay;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
506   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
508   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
509   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
510   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
511   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
512   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
513   PetscFunctionReturn(0);
514 }
515 
516 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
517 {
518   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
519   PetscErrorCode    ierr;
520   const PetscScalar *ax;
521   PetscScalar       *ay;
522 
523   PetscFunctionBegin;
524   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
525   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
526   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
527   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
528   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
529   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
530   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
531   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
532   PetscFunctionReturn(0);
533 }
534 
535 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
536 {
537   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
538   PetscErrorCode    ierr;
539   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
540   PetscScalar       *x,zero = 0.0;
541   const PetscScalar *av;
542 
543   PetscFunctionBegin;
544   ierr = VecSet(v,zero);CHKERRQ(ierr);
545   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
546   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
547   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
548   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
549   radd = A->rmap->rstart*m;
550   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
551   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
552   for (i=0; i<len; i++) {
553     x[i] = av[radd + i*lda + i];
554   }
555   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
556   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 PetscErrorCode MatDestroy_MPIDense(Mat mat)
561 {
562   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
563   PetscErrorCode ierr;
564 
565   PetscFunctionBegin;
566 #if defined(PETSC_USE_LOG)
567   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
568 #endif
569   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
570   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
571   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
572   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
573   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
574   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
575   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
576   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
577 
578   ierr = PetscFree(mat->data);CHKERRQ(ierr);
579   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
580 
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
592 #if defined(PETSC_HAVE_ELEMENTAL)
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
594 #endif
595 #if defined(PETSC_HAVE_SCALAPACK)
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
597 #endif
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
601 #if defined (PETSC_HAVE_CUDA)
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
604 #endif
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
609   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
610   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
611   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
612   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
614   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
615   PetscFunctionReturn(0);
616 }
617 
618 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
619 
620 #include <petscdraw.h>
621 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
622 {
623   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
624   PetscErrorCode    ierr;
625   PetscMPIInt       rank;
626   PetscViewerType   vtype;
627   PetscBool         iascii,isdraw;
628   PetscViewer       sviewer;
629   PetscViewerFormat format;
630 
631   PetscFunctionBegin;
632   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
633   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
634   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
635   if (iascii) {
636     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
637     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
638     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
639       MatInfo info;
640       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
641       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
642       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
643       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
644       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
645       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
646       PetscFunctionReturn(0);
647     } else if (format == PETSC_VIEWER_ASCII_INFO) {
648       PetscFunctionReturn(0);
649     }
650   } else if (isdraw) {
651     PetscDraw draw;
652     PetscBool isnull;
653 
654     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
655     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
656     if (isnull) PetscFunctionReturn(0);
657   }
658 
659   {
660     /* assemble the entire matrix onto first processor. */
661     Mat         A;
662     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
663     PetscInt    *cols;
664     PetscScalar *vals;
665 
666     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
667     if (!rank) {
668       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
669     } else {
670       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
671     }
672     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
673     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
674     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
675     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
676 
677     /* Copy the matrix ... This isn't the most efficient means,
678        but it's quick for now */
679     A->insertmode = INSERT_VALUES;
680 
681     row = mat->rmap->rstart;
682     m   = mdn->A->rmap->n;
683     for (i=0; i<m; i++) {
684       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
685       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
686       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
687       row++;
688     }
689 
690     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
692     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
693     if (!rank) {
694       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
695       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
696     }
697     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
698     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
699     ierr = MatDestroy(&A);CHKERRQ(ierr);
700   }
701   PetscFunctionReturn(0);
702 }
703 
704 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
705 {
706   PetscErrorCode ierr;
707   PetscBool      iascii,isbinary,isdraw,issocket;
708 
709   PetscFunctionBegin;
710   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
711   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
712   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
713   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
714 
715   if (iascii || issocket || isdraw) {
716     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
717   } else if (isbinary) {
718     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
724 {
725   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
726   Mat            mdn  = mat->A;
727   PetscErrorCode ierr;
728   PetscLogDouble isend[5],irecv[5];
729 
730   PetscFunctionBegin;
731   info->block_size = 1.0;
732 
733   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
734 
735   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
736   isend[3] = info->memory;  isend[4] = info->mallocs;
737   if (flag == MAT_LOCAL) {
738     info->nz_used      = isend[0];
739     info->nz_allocated = isend[1];
740     info->nz_unneeded  = isend[2];
741     info->memory       = isend[3];
742     info->mallocs      = isend[4];
743   } else if (flag == MAT_GLOBAL_MAX) {
744     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
745 
746     info->nz_used      = irecv[0];
747     info->nz_allocated = irecv[1];
748     info->nz_unneeded  = irecv[2];
749     info->memory       = irecv[3];
750     info->mallocs      = irecv[4];
751   } else if (flag == MAT_GLOBAL_SUM) {
752     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
753 
754     info->nz_used      = irecv[0];
755     info->nz_allocated = irecv[1];
756     info->nz_unneeded  = irecv[2];
757     info->memory       = irecv[3];
758     info->mallocs      = irecv[4];
759   }
760   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
761   info->fill_ratio_needed = 0;
762   info->factor_mallocs    = 0;
763   PetscFunctionReturn(0);
764 }
765 
766 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
767 {
768   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
769   PetscErrorCode ierr;
770 
771   PetscFunctionBegin;
772   switch (op) {
773   case MAT_NEW_NONZERO_LOCATIONS:
774   case MAT_NEW_NONZERO_LOCATION_ERR:
775   case MAT_NEW_NONZERO_ALLOCATION_ERR:
776     MatCheckPreallocated(A,1);
777     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
778     break;
779   case MAT_ROW_ORIENTED:
780     MatCheckPreallocated(A,1);
781     a->roworiented = flg;
782     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
783     break;
784   case MAT_NEW_DIAGONALS:
785   case MAT_KEEP_NONZERO_PATTERN:
786   case MAT_USE_HASH_TABLE:
787   case MAT_SORTED_FULL:
788     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
789     break;
790   case MAT_IGNORE_OFF_PROC_ENTRIES:
791     a->donotstash = flg;
792     break;
793   case MAT_SYMMETRIC:
794   case MAT_STRUCTURALLY_SYMMETRIC:
795   case MAT_HERMITIAN:
796   case MAT_SYMMETRY_ETERNAL:
797   case MAT_IGNORE_LOWER_TRIANGULAR:
798   case MAT_IGNORE_ZERO_ENTRIES:
799     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
800     break;
801   default:
802     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
803   }
804   PetscFunctionReturn(0);
805 }
806 
807 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
808 {
809   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
810   const PetscScalar *l;
811   PetscScalar       x,*v,*vv,*r;
812   PetscErrorCode    ierr;
813   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
814 
815   PetscFunctionBegin;
816   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
817   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
818   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
819   if (ll) {
820     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
821     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
822     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
823     for (i=0; i<m; i++) {
824       x = l[i];
825       v = vv + i;
826       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
827     }
828     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
829     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
830   }
831   if (rr) {
832     const PetscScalar *ar;
833 
834     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
835     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
836     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
837     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
838     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
839     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
840     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
841     for (i=0; i<n; i++) {
842       x = r[i];
843       v = vv + i*lda;
844       for (j=0; j<m; j++) (*v++) *= x;
845     }
846     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
847     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
848   }
849   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
854 {
855   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
856   PetscErrorCode    ierr;
857   PetscInt          i,j;
858   PetscMPIInt       size;
859   PetscReal         sum = 0.0;
860   const PetscScalar *av,*v;
861 
862   PetscFunctionBegin;
863   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
864   v    = av;
865   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
866   if (size == 1) {
867     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
868   } else {
869     if (type == NORM_FROBENIUS) {
870       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
871         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
872       }
873       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
874       *nrm = PetscSqrtReal(*nrm);
875       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
876     } else if (type == NORM_1) {
877       PetscReal *tmp,*tmp2;
878       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
879       *nrm = 0.0;
880       v    = av;
881       for (j=0; j<mdn->A->cmap->n; j++) {
882         for (i=0; i<mdn->A->rmap->n; i++) {
883           tmp[j] += PetscAbsScalar(*v);  v++;
884         }
885       }
886       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
887       for (j=0; j<A->cmap->N; j++) {
888         if (tmp2[j] > *nrm) *nrm = tmp2[j];
889       }
890       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
891       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
892     } else if (type == NORM_INFINITY) { /* max row norm */
893       PetscReal ntemp;
894       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
895       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
896     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
897   }
898   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
903 {
904   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
905   Mat            B;
906   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
907   PetscErrorCode ierr;
908   PetscInt       j,i,lda;
909   PetscScalar    *v;
910 
911   PetscFunctionBegin;
912   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
913     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
914     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
915     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
916     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
917   } else B = *matout;
918 
919   m    = a->A->rmap->n; n = a->A->cmap->n;
920   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
921   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
922   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
923   for (i=0; i<m; i++) rwork[i] = rstart + i;
924   for (j=0; j<n; j++) {
925     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
926     v   += lda;
927   }
928   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
929   ierr = PetscFree(rwork);CHKERRQ(ierr);
930   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
932   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
933     *matout = B;
934   } else {
935     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
936   }
937   PetscFunctionReturn(0);
938 }
939 
940 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
941 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
942 
943 PetscErrorCode MatSetUp_MPIDense(Mat A)
944 {
945   PetscErrorCode ierr;
946 
947   PetscFunctionBegin;
948   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
949   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
950   if (!A->preallocated) {
951     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
957 {
958   PetscErrorCode ierr;
959   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
960 
961   PetscFunctionBegin;
962   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatConjugate_MPIDense(Mat mat)
967 {
968   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   ierr = MatConjugate(a->A);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 PetscErrorCode MatRealPart_MPIDense(Mat A)
977 {
978   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
979   PetscErrorCode ierr;
980 
981   PetscFunctionBegin;
982   ierr = MatRealPart(a->A);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
987 {
988   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
997 {
998   PetscErrorCode ierr;
999   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1000 
1001   PetscFunctionBegin;
1002   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1003   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1004   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1009 
1010 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1011 {
1012   PetscErrorCode ierr;
1013   PetscInt       i,n;
1014   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1015   PetscReal      *work;
1016 
1017   PetscFunctionBegin;
1018   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1019   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1020   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1021   if (type == NORM_2) {
1022     for (i=0; i<n; i++) work[i] *= work[i];
1023   }
1024   if (type == NORM_INFINITY) {
1025     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1026   } else {
1027     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1028   }
1029   ierr = PetscFree(work);CHKERRQ(ierr);
1030   if (type == NORM_2) {
1031     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1032   }
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #if defined(PETSC_HAVE_CUDA)
1037 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1038 {
1039   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1040   PetscErrorCode ierr;
1041   PetscInt       lda;
1042 
1043   PetscFunctionBegin;
1044   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1045   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1046   if (!a->cvec) {
1047     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1048     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1049   }
1050   a->vecinuse = col + 1;
1051   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1052   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1053   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1054   *v   = a->cvec;
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1059 {
1060   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1061   PetscErrorCode ierr;
1062 
1063   PetscFunctionBegin;
1064   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1065   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1066   a->vecinuse = 0;
1067   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1068   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1069   *v   = NULL;
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1074 {
1075   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1076   PetscInt       lda;
1077   PetscErrorCode ierr;
1078 
1079   PetscFunctionBegin;
1080   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1081   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1082   if (!a->cvec) {
1083     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1084     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1085   }
1086   a->vecinuse = col + 1;
1087   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1088   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1089   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1090   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1091   *v   = a->cvec;
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1096 {
1097   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1102   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1103   a->vecinuse = 0;
1104   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1105   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1106   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1107   *v   = NULL;
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1112 {
1113   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1114   PetscErrorCode ierr;
1115   PetscInt       lda;
1116 
1117   PetscFunctionBegin;
1118   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1119   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1120   if (!a->cvec) {
1121     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1122     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1123   }
1124   a->vecinuse = col + 1;
1125   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1126   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1127   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1128   *v   = a->cvec;
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1133 {
1134   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1139   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1140   a->vecinuse = 0;
1141   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1142   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1143   *v   = NULL;
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1148 {
1149   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1154   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1155   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1160 {
1161   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1162   PetscErrorCode ierr;
1163 
1164   PetscFunctionBegin;
1165   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1166   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1167   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1172 {
1173   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1174   PetscErrorCode ierr;
1175 
1176   PetscFunctionBegin;
1177   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1178   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1179   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1184 {
1185   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1186   PetscErrorCode ierr;
1187 
1188   PetscFunctionBegin;
1189   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1194 {
1195   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1204 {
1205   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1214 {
1215   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1216   PetscErrorCode ierr;
1217 
1218   PetscFunctionBegin;
1219   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1224 {
1225   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1234 {
1235   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1244 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1245 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1246 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1247 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1248 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1249 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1250 
1251 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1252 {
1253   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1258   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1259   if (d->A) {
1260     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1261   }
1262   mat->boundtocpu = bind;
1263   if (!bind) {
1264     PetscBool iscuda;
1265 
1266     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1267     if (!iscuda) {
1268       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1269     }
1270     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1271     if (!iscuda) {
1272       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1273     }
1274     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1275     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1276     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1277     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1278     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1279     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1280   } else {
1281     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1282     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1283     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1284     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1285     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1286     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1287   }
1288   if (d->cmat) {
1289     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1290   }
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1295 {
1296   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1297   PetscErrorCode ierr;
1298   PetscBool      iscuda;
1299 
1300   PetscFunctionBegin;
1301   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1302   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1303   if (!iscuda) PetscFunctionReturn(0);
1304   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1305   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1306   if (!d->A) {
1307     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1308     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1309     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1310   }
1311   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1312   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1313   A->preallocated = PETSC_TRUE;
1314   PetscFunctionReturn(0);
1315 }
1316 #endif
1317 
1318 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1319 {
1320   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1329 {
1330   PetscFunctionBegin;
1331   *missing = PETSC_FALSE;
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1336 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1337 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1338 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1339 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1340 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1341 
1342 /* -------------------------------------------------------------------*/
1343 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1344                                         MatGetRow_MPIDense,
1345                                         MatRestoreRow_MPIDense,
1346                                         MatMult_MPIDense,
1347                                 /*  4*/ MatMultAdd_MPIDense,
1348                                         MatMultTranspose_MPIDense,
1349                                         MatMultTransposeAdd_MPIDense,
1350                                         0,
1351                                         0,
1352                                         0,
1353                                 /* 10*/ 0,
1354                                         0,
1355                                         0,
1356                                         0,
1357                                         MatTranspose_MPIDense,
1358                                 /* 15*/ MatGetInfo_MPIDense,
1359                                         MatEqual_MPIDense,
1360                                         MatGetDiagonal_MPIDense,
1361                                         MatDiagonalScale_MPIDense,
1362                                         MatNorm_MPIDense,
1363                                 /* 20*/ MatAssemblyBegin_MPIDense,
1364                                         MatAssemblyEnd_MPIDense,
1365                                         MatSetOption_MPIDense,
1366                                         MatZeroEntries_MPIDense,
1367                                 /* 24*/ MatZeroRows_MPIDense,
1368                                         0,
1369                                         0,
1370                                         0,
1371                                         0,
1372                                 /* 29*/ MatSetUp_MPIDense,
1373                                         0,
1374                                         0,
1375                                         MatGetDiagonalBlock_MPIDense,
1376                                         0,
1377                                 /* 34*/ MatDuplicate_MPIDense,
1378                                         0,
1379                                         0,
1380                                         0,
1381                                         0,
1382                                 /* 39*/ MatAXPY_MPIDense,
1383                                         MatCreateSubMatrices_MPIDense,
1384                                         0,
1385                                         MatGetValues_MPIDense,
1386                                         0,
1387                                 /* 44*/ 0,
1388                                         MatScale_MPIDense,
1389                                         MatShift_Basic,
1390                                         0,
1391                                         0,
1392                                 /* 49*/ MatSetRandom_MPIDense,
1393                                         0,
1394                                         0,
1395                                         0,
1396                                         0,
1397                                 /* 54*/ 0,
1398                                         0,
1399                                         0,
1400                                         0,
1401                                         0,
1402                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1403                                         MatDestroy_MPIDense,
1404                                         MatView_MPIDense,
1405                                         0,
1406                                         0,
1407                                 /* 64*/ 0,
1408                                         0,
1409                                         0,
1410                                         0,
1411                                         0,
1412                                 /* 69*/ 0,
1413                                         0,
1414                                         0,
1415                                         0,
1416                                         0,
1417                                 /* 74*/ 0,
1418                                         0,
1419                                         0,
1420                                         0,
1421                                         0,
1422                                 /* 79*/ 0,
1423                                         0,
1424                                         0,
1425                                         0,
1426                                 /* 83*/ MatLoad_MPIDense,
1427                                         0,
1428                                         0,
1429                                         0,
1430                                         0,
1431                                         0,
1432                                 /* 89*/ 0,
1433                                         0,
1434                                         0,
1435                                         0,
1436                                         0,
1437                                 /* 94*/ 0,
1438                                         0,
1439                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1440                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1441                                         0,
1442                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1443                                         0,
1444                                         0,
1445                                         MatConjugate_MPIDense,
1446                                         0,
1447                                 /*104*/ 0,
1448                                         MatRealPart_MPIDense,
1449                                         MatImaginaryPart_MPIDense,
1450                                         0,
1451                                         0,
1452                                 /*109*/ 0,
1453                                         0,
1454                                         0,
1455                                         MatGetColumnVector_MPIDense,
1456                                         MatMissingDiagonal_MPIDense,
1457                                 /*114*/ 0,
1458                                         0,
1459                                         0,
1460                                         0,
1461                                         0,
1462                                 /*119*/ 0,
1463                                         0,
1464                                         0,
1465                                         0,
1466                                         0,
1467                                 /*124*/ 0,
1468                                         MatGetColumnNorms_MPIDense,
1469                                         0,
1470                                         0,
1471                                         0,
1472                                 /*129*/ 0,
1473                                         0,
1474                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1475                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1476                                         0,
1477                                 /*134*/ 0,
1478                                         0,
1479                                         0,
1480                                         0,
1481                                         0,
1482                                 /*139*/ 0,
1483                                         0,
1484                                         0,
1485                                         0,
1486                                         0,
1487                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1488                                 /*145*/ 0,
1489                                         0,
1490                                         0
1491 };
1492 
1493 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1494 {
1495   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1496   PetscBool      iscuda;
1497   PetscErrorCode ierr;
1498 
1499   PetscFunctionBegin;
1500   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1501   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1502   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1503   if (!a->A) {
1504     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1505     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1506     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1507   }
1508   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1509   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1510   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1511   mat->preallocated = PETSC_TRUE;
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #if defined(PETSC_HAVE_ELEMENTAL)
1516 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1517 {
1518   Mat            mat_elemental;
1519   PetscErrorCode ierr;
1520   PetscScalar    *v;
1521   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1522 
1523   PetscFunctionBegin;
1524   if (reuse == MAT_REUSE_MATRIX) {
1525     mat_elemental = *newmat;
1526     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1527   } else {
1528     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1529     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1530     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1531     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1532     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1533   }
1534 
1535   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1536   for (i=0; i<N; i++) cols[i] = i;
1537   for (i=0; i<m; i++) rows[i] = rstart + i;
1538 
1539   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1540   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1541   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1542   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1543   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1544   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1545   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1546 
1547   if (reuse == MAT_INPLACE_MATRIX) {
1548     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1549   } else {
1550     *newmat = mat_elemental;
1551   }
1552   PetscFunctionReturn(0);
1553 }
1554 #endif
1555 
1556 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1557 {
1558   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1559   PetscErrorCode ierr;
1560 
1561   PetscFunctionBegin;
1562   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1567 {
1568   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1569   PetscErrorCode ierr;
1570 
1571   PetscFunctionBegin;
1572   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1577 {
1578   PetscErrorCode ierr;
1579   Mat_MPIDense   *mat;
1580   PetscInt       m,nloc,N;
1581 
1582   PetscFunctionBegin;
1583   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1584   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1585   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1586     PetscInt sum;
1587 
1588     if (n == PETSC_DECIDE) {
1589       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1590     }
1591     /* Check sum(n) = N */
1592     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1593     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1594 
1595     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1596   }
1597 
1598   /* numeric phase */
1599   mat = (Mat_MPIDense*)(*outmat)->data;
1600   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1601   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1602   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 #if defined(PETSC_HAVE_CUDA)
1607 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1608 {
1609   Mat            B;
1610   Mat_MPIDense   *m;
1611   PetscErrorCode ierr;
1612 
1613   PetscFunctionBegin;
1614   if (reuse == MAT_INITIAL_MATRIX) {
1615     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1616   } else if (reuse == MAT_REUSE_MATRIX) {
1617     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1618   }
1619 
1620   B    = *newmat;
1621   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1622   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1623   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1624   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1626   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1627   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1629   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1630   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1632   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1633   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1635   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1636   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1637   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1638   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1639   m    = (Mat_MPIDense*)(B)->data;
1640   if (m->A) {
1641     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1642     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1643   }
1644   B->ops->bindtocpu = NULL;
1645   B->offloadmask    = PETSC_OFFLOAD_CPU;
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1650 {
1651   Mat            B;
1652   Mat_MPIDense   *m;
1653   PetscErrorCode ierr;
1654 
1655   PetscFunctionBegin;
1656   if (reuse == MAT_INITIAL_MATRIX) {
1657     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1658   } else if (reuse == MAT_REUSE_MATRIX) {
1659     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1660   }
1661 
1662   B    = *newmat;
1663   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1664   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1665   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1680   m    = (Mat_MPIDense*)(B)->data;
1681   if (m->A) {
1682     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1683     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1684     B->offloadmask = PETSC_OFFLOAD_BOTH;
1685   } else {
1686     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1687   }
1688   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1689 
1690   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1691   PetscFunctionReturn(0);
1692 }
1693 #endif
1694 
1695 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1696 {
1697   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1698   PetscErrorCode ierr;
1699   PetscInt       lda;
1700 
1701   PetscFunctionBegin;
1702   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1703   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1704   if (!a->cvec) {
1705     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1706   }
1707   a->vecinuse = col + 1;
1708   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1709   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1710   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1711   *v   = a->cvec;
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1716 {
1717   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1718   PetscErrorCode ierr;
1719 
1720   PetscFunctionBegin;
1721   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1722   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1723   a->vecinuse = 0;
1724   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1725   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1726   *v   = NULL;
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1731 {
1732   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1733   PetscErrorCode ierr;
1734   PetscInt       lda;
1735 
1736   PetscFunctionBegin;
1737   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1738   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1739   if (!a->cvec) {
1740     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1741   }
1742   a->vecinuse = col + 1;
1743   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1744   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1745   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1746   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1747   *v   = a->cvec;
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1752 {
1753   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1754   PetscErrorCode ierr;
1755 
1756   PetscFunctionBegin;
1757   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1758   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1759   a->vecinuse = 0;
1760   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1761   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1762   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1763   *v   = NULL;
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1768 {
1769   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1770   PetscErrorCode ierr;
1771   PetscInt       lda;
1772 
1773   PetscFunctionBegin;
1774   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1775   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1776   if (!a->cvec) {
1777     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1778   }
1779   a->vecinuse = col + 1;
1780   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1781   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1782   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1783   *v   = a->cvec;
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1788 {
1789   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1790   PetscErrorCode ierr;
1791 
1792   PetscFunctionBegin;
1793   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1794   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1795   a->vecinuse = 0;
1796   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1797   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1798   *v   = NULL;
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1803 {
1804   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1805   Mat_MPIDense   *c;
1806   PetscErrorCode ierr;
1807   MPI_Comm       comm;
1808   PetscBool      setup = PETSC_FALSE;
1809 
1810   PetscFunctionBegin;
1811   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1812   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1813   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1814   if (!a->cmat) {
1815     setup = PETSC_TRUE;
1816     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1817     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1818     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1819     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1820     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1821     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1822   } else if (cend-cbegin != a->cmat->cmap->N) {
1823     setup = PETSC_TRUE;
1824     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1825     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1826     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1827     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1828   }
1829   c = (Mat_MPIDense*)a->cmat->data;
1830   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1831   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1832   if (setup) { /* do we really need this? */
1833     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1834   }
1835   a->cmat->preallocated = PETSC_TRUE;
1836   a->cmat->assembled = PETSC_TRUE;
1837   a->matinuse = cbegin + 1;
1838   *v = a->cmat;
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1843 {
1844   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1845   Mat_MPIDense   *c;
1846   PetscErrorCode ierr;
1847 
1848   PetscFunctionBegin;
1849   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1850   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1851   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1852   a->matinuse = 0;
1853   c    = (Mat_MPIDense*)a->cmat->data;
1854   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1855   *v   = NULL;
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1860 {
1861   Mat_MPIDense   *a;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1866   mat->data = (void*)a;
1867   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1868 
1869   mat->insertmode = NOT_SET_VALUES;
1870 
1871   /* build cache for off array entries formed */
1872   a->donotstash = PETSC_FALSE;
1873 
1874   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1875 
1876   /* stuff used for matrix vector multiply */
1877   a->lvec        = 0;
1878   a->Mvctx       = 0;
1879   a->roworiented = PETSC_TRUE;
1880 
1881   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1882   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1883   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1884   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1885   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1886   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1887   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1888   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1889   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1890   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1891   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1892   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1893   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1894   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1895   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1896   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1897   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1898   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1899   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1900 #if defined(PETSC_HAVE_ELEMENTAL)
1901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1902 #endif
1903 #if defined(PETSC_HAVE_SCALAPACK)
1904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1905 #endif
1906 #if defined(PETSC_HAVE_CUDA)
1907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1908 #endif
1909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1912 #if defined(PETSC_HAVE_CUDA)
1913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1915 #endif
1916 
1917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1919   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 /*MC
1924    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1925 
1926    Options Database Keys:
1927 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1928 
1929   Level: beginner
1930 
1931 .seealso:
1932 
1933 M*/
1934 #if defined(PETSC_HAVE_CUDA)
1935 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1936 {
1937   PetscErrorCode ierr;
1938 
1939   PetscFunctionBegin;
1940   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1941   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 #endif
1945 
1946 /*MC
1947    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1948 
1949    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1950    and MATMPIDENSE otherwise.
1951 
1952    Options Database Keys:
1953 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1954 
1955   Level: beginner
1956 
1957 
1958 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1959 M*/
1960 
1961 /*MC
1962    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1963 
1964    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1965    and MATMPIDENSECUDA otherwise.
1966 
1967    Options Database Keys:
1968 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1969 
1970   Level: beginner
1971 
1972 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1973 M*/
1974 
1975 /*@C
1976    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1977 
1978    Collective
1979 
1980    Input Parameters:
1981 .  B - the matrix
1982 -  data - optional location of matrix data.  Set data=NULL for PETSc
1983    to control all matrix memory allocation.
1984 
1985    Notes:
1986    The dense format is fully compatible with standard Fortran 77
1987    storage by columns.
1988 
1989    The data input variable is intended primarily for Fortran programmers
1990    who wish to allocate their own matrix memory space.  Most users should
1991    set data=NULL.
1992 
1993    Level: intermediate
1994 
1995 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1996 @*/
1997 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1998 {
1999   PetscErrorCode ierr;
2000 
2001   PetscFunctionBegin;
2002   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2003   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 /*@
2008    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2009    array provided by the user. This is useful to avoid copying an array
2010    into a matrix
2011 
2012    Not Collective
2013 
2014    Input Parameters:
2015 +  mat - the matrix
2016 -  array - the array in column major order
2017 
2018    Notes:
2019    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2020    freed when the matrix is destroyed.
2021 
2022    Level: developer
2023 
2024 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2025 
2026 @*/
2027 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2028 {
2029   PetscErrorCode ierr;
2030 
2031   PetscFunctionBegin;
2032   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2033   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2034   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2035 #if defined(PETSC_HAVE_CUDA)
2036   mat->offloadmask = PETSC_OFFLOAD_CPU;
2037 #endif
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 /*@
2042    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2043 
2044    Not Collective
2045 
2046    Input Parameters:
2047 .  mat - the matrix
2048 
2049    Notes:
2050    You can only call this after a call to MatDensePlaceArray()
2051 
2052    Level: developer
2053 
2054 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2055 
2056 @*/
2057 PetscErrorCode  MatDenseResetArray(Mat mat)
2058 {
2059   PetscErrorCode ierr;
2060 
2061   PetscFunctionBegin;
2062   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2063   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2064   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*@
2069    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2070    array provided by the user. This is useful to avoid copying an array
2071    into a matrix
2072 
2073    Not Collective
2074 
2075    Input Parameters:
2076 +  mat - the matrix
2077 -  array - the array in column major order
2078 
2079    Notes:
2080    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2081    freed by the user. It will be freed when the matrix is destroyed.
2082 
2083    Level: developer
2084 
2085 .seealso: MatDenseGetArray(), VecReplaceArray()
2086 @*/
2087 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2088 {
2089   PetscErrorCode ierr;
2090 
2091   PetscFunctionBegin;
2092   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2093   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2094   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2095 #if defined(PETSC_HAVE_CUDA)
2096   mat->offloadmask = PETSC_OFFLOAD_CPU;
2097 #endif
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 #if defined(PETSC_HAVE_CUDA)
2102 /*@C
2103    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2104    array provided by the user. This is useful to avoid copying an array
2105    into a matrix
2106 
2107    Not Collective
2108 
2109    Input Parameters:
2110 +  mat - the matrix
2111 -  array - the array in column major order
2112 
2113    Notes:
2114    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2115    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2116 
2117    Level: developer
2118 
2119 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2120 @*/
2121 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2122 {
2123   PetscErrorCode ierr;
2124 
2125   PetscFunctionBegin;
2126   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2127   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2128   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2129   mat->offloadmask = PETSC_OFFLOAD_GPU;
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 /*@C
2134    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2135 
2136    Not Collective
2137 
2138    Input Parameters:
2139 .  mat - the matrix
2140 
2141    Notes:
2142    You can only call this after a call to MatDenseCUDAPlaceArray()
2143 
2144    Level: developer
2145 
2146 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2147 
2148 @*/
2149 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2150 {
2151   PetscErrorCode ierr;
2152 
2153   PetscFunctionBegin;
2154   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2155   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2156   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 /*@C
2161    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2162    array provided by the user. This is useful to avoid copying an array
2163    into a matrix
2164 
2165    Not Collective
2166 
2167    Input Parameters:
2168 +  mat - the matrix
2169 -  array - the array in column major order
2170 
2171    Notes:
2172    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2173    The memory passed in CANNOT be freed by the user. It will be freed
2174    when the matrix is destroyed. The array should respect the matrix leading dimension.
2175 
2176    Level: developer
2177 
2178 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2179 @*/
2180 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2181 {
2182   PetscErrorCode ierr;
2183 
2184   PetscFunctionBegin;
2185   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2186   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2187   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2188   mat->offloadmask = PETSC_OFFLOAD_GPU;
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 /*@C
2193    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2194 
2195    Not Collective
2196 
2197    Input Parameters:
2198 .  A - the matrix
2199 
2200    Output Parameters
2201 .  array - the GPU array in column major order
2202 
2203    Notes:
2204    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2205 
2206    Level: developer
2207 
2208 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2209 @*/
2210 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2211 {
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2216   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2217   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 /*@C
2222    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2223 
2224    Not Collective
2225 
2226    Input Parameters:
2227 +  A - the matrix
2228 -  array - the GPU array in column major order
2229 
2230    Notes:
2231 
2232    Level: developer
2233 
2234 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2235 @*/
2236 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2237 {
2238   PetscErrorCode ierr;
2239 
2240   PetscFunctionBegin;
2241   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2242   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2243   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2244   A->offloadmask = PETSC_OFFLOAD_GPU;
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 /*@C
2249    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2250 
2251    Not Collective
2252 
2253    Input Parameters:
2254 .  A - the matrix
2255 
2256    Output Parameters
2257 .  array - the GPU array in column major order
2258 
2259    Notes:
2260    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2261 
2262    Level: developer
2263 
2264 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2265 @*/
2266 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2267 {
2268   PetscErrorCode ierr;
2269 
2270   PetscFunctionBegin;
2271   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2272   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 /*@C
2277    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2278 
2279    Not Collective
2280 
2281    Input Parameters:
2282 +  A - the matrix
2283 -  array - the GPU array in column major order
2284 
2285    Notes:
2286    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2287 
2288    Level: developer
2289 
2290 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2291 @*/
2292 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2293 {
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 /*@C
2302    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2303 
2304    Not Collective
2305 
2306    Input Parameters:
2307 .  A - the matrix
2308 
2309    Output Parameters
2310 .  array - the GPU array in column major order
2311 
2312    Notes:
2313    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2314 
2315    Level: developer
2316 
2317 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2318 @*/
2319 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2320 {
2321   PetscErrorCode ierr;
2322 
2323   PetscFunctionBegin;
2324   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2325   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2326   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2327   PetscFunctionReturn(0);
2328 }
2329 
2330 /*@C
2331    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2332 
2333    Not Collective
2334 
2335    Input Parameters:
2336 +  A - the matrix
2337 -  array - the GPU array in column major order
2338 
2339    Notes:
2340 
2341    Level: developer
2342 
2343 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2344 @*/
2345 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2346 {
2347   PetscErrorCode ierr;
2348 
2349   PetscFunctionBegin;
2350   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2351   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2352   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2353   A->offloadmask = PETSC_OFFLOAD_GPU;
2354   PetscFunctionReturn(0);
2355 }
2356 #endif
2357 
2358 /*@C
2359    MatCreateDense - Creates a matrix in dense format.
2360 
2361    Collective
2362 
2363    Input Parameters:
2364 +  comm - MPI communicator
2365 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2366 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2367 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2368 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2369 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2370    to control all matrix memory allocation.
2371 
2372    Output Parameter:
2373 .  A - the matrix
2374 
2375    Notes:
2376    The dense format is fully compatible with standard Fortran 77
2377    storage by columns.
2378 
2379    The data input variable is intended primarily for Fortran programmers
2380    who wish to allocate their own matrix memory space.  Most users should
2381    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2382 
2383    The user MUST specify either the local or global matrix dimensions
2384    (possibly both).
2385 
2386    Level: intermediate
2387 
2388 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2389 @*/
2390 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2391 {
2392   PetscErrorCode ierr;
2393   PetscMPIInt    size;
2394 
2395   PetscFunctionBegin;
2396   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2397   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2398   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2399   if (size > 1) {
2400     PetscBool havedata = (PetscBool)!!data;
2401 
2402     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2403     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2404     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr);
2405     if (havedata) {  /* user provided data array, so no need to assemble */
2406       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2407       (*A)->assembled = PETSC_TRUE;
2408     }
2409   } else {
2410     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2411     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2412   }
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 #if defined(PETSC_HAVE_CUDA)
2417 /*@C
2418    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2419 
2420    Collective
2421 
2422    Input Parameters:
2423 +  comm - MPI communicator
2424 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2425 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2426 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2427 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2428 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2429    to control matrix memory allocation.
2430 
2431    Output Parameter:
2432 .  A - the matrix
2433 
2434    Notes:
2435 
2436    Level: intermediate
2437 
2438 .seealso: MatCreate(), MatCreateDense()
2439 @*/
2440 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2441 {
2442   PetscErrorCode ierr;
2443   PetscMPIInt    size;
2444 
2445   PetscFunctionBegin;
2446   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2447   PetscValidLogicalCollectiveBool(*A,!!data,6);
2448   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2449   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2450   if (size > 1) {
2451     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2452     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2453     if (data) {  /* user provided data array, so no need to assemble */
2454       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2455       (*A)->assembled = PETSC_TRUE;
2456     }
2457   } else {
2458     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2459     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2460   }
2461   PetscFunctionReturn(0);
2462 }
2463 #endif
2464 
2465 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2466 {
2467   Mat            mat;
2468   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2469   PetscErrorCode ierr;
2470 
2471   PetscFunctionBegin;
2472   *newmat = 0;
2473   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2474   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2475   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2476   a       = (Mat_MPIDense*)mat->data;
2477 
2478   mat->factortype   = A->factortype;
2479   mat->assembled    = PETSC_TRUE;
2480   mat->preallocated = PETSC_TRUE;
2481 
2482   mat->insertmode = NOT_SET_VALUES;
2483   a->donotstash   = oldmat->donotstash;
2484 
2485   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2486   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2487 
2488   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2489   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2490   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2491 
2492   *newmat = mat;
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2497 {
2498   PetscErrorCode ierr;
2499   PetscBool      isbinary;
2500 #if defined(PETSC_HAVE_HDF5)
2501   PetscBool      ishdf5;
2502 #endif
2503 
2504   PetscFunctionBegin;
2505   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2506   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2507   /* force binary viewer to load .info file if it has not yet done so */
2508   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2509   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2510 #if defined(PETSC_HAVE_HDF5)
2511   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2512 #endif
2513   if (isbinary) {
2514     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2515 #if defined(PETSC_HAVE_HDF5)
2516   } else if (ishdf5) {
2517     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2518 #endif
2519   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2524 {
2525   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2526   Mat            a,b;
2527   PetscBool      flg;
2528   PetscErrorCode ierr;
2529 
2530   PetscFunctionBegin;
2531   a    = matA->A;
2532   b    = matB->A;
2533   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2534   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2539 {
2540   PetscErrorCode        ierr;
2541   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2542 
2543   PetscFunctionBegin;
2544   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2545   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2546   ierr = PetscFree(atb);CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2551 {
2552   PetscErrorCode        ierr;
2553   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2554 
2555   PetscFunctionBegin;
2556   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2557   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2558   ierr = PetscFree(abt);CHKERRQ(ierr);
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2563 {
2564   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2565   Mat_TransMatMultDense *atb;
2566   PetscErrorCode        ierr;
2567   MPI_Comm              comm;
2568   PetscMPIInt           size,*recvcounts;
2569   PetscScalar           *carray,*sendbuf;
2570   const PetscScalar     *atbarray;
2571   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2572   const PetscInt        *ranges;
2573 
2574   PetscFunctionBegin;
2575   MatCheckProduct(C,3);
2576   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2577   atb = (Mat_TransMatMultDense *)C->product->data;
2578   recvcounts = atb->recvcounts;
2579   sendbuf = atb->sendbuf;
2580 
2581   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2582   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2583 
2584   /* compute atbarray = aseq^T * bseq */
2585   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2586 
2587   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2588 
2589   /* arrange atbarray into sendbuf */
2590   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2591   for (proc=0, k=0; proc<size; proc++) {
2592     for (j=0; j<cN; j++) {
2593       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2594     }
2595   }
2596   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2597 
2598   /* sum all atbarray to local values of C */
2599   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2600   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2601   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2602   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2603   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2608 {
2609   PetscErrorCode        ierr;
2610   MPI_Comm              comm;
2611   PetscMPIInt           size;
2612   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2613   Mat_TransMatMultDense *atb;
2614   PetscBool             cisdense;
2615   PetscInt              i;
2616   const PetscInt        *ranges;
2617 
2618   PetscFunctionBegin;
2619   MatCheckProduct(C,3);
2620   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2621   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2622   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2623     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2624   }
2625 
2626   /* create matrix product C */
2627   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2628   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2629   if (!cisdense) {
2630     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2631   }
2632   ierr = MatSetUp(C);CHKERRQ(ierr);
2633 
2634   /* create data structure for reuse C */
2635   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2636   ierr = PetscNew(&atb);CHKERRQ(ierr);
2637   cM   = C->rmap->N;
2638   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2639   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2640   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2641 
2642   C->product->data    = atb;
2643   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2648 {
2649   PetscErrorCode        ierr;
2650   MPI_Comm              comm;
2651   PetscMPIInt           i, size;
2652   PetscInt              maxRows, bufsiz;
2653   PetscMPIInt           tag;
2654   PetscInt              alg;
2655   Mat_MatTransMultDense *abt;
2656   Mat_Product           *product = C->product;
2657   PetscBool             flg;
2658 
2659   PetscFunctionBegin;
2660   MatCheckProduct(C,4);
2661   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2662   /* check local size of A and B */
2663   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2664 
2665   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2666   alg  = flg ? 0 : 1;
2667 
2668   /* setup matrix product C */
2669   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2670   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2671   ierr = MatSetUp(C);CHKERRQ(ierr);
2672   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2673 
2674   /* create data structure for reuse C */
2675   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2676   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2677   ierr = PetscNew(&abt);CHKERRQ(ierr);
2678   abt->tag = tag;
2679   abt->alg = alg;
2680   switch (alg) {
2681   case 1: /* alg: "cyclic" */
2682     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2683     bufsiz = A->cmap->N * maxRows;
2684     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2685     break;
2686   default: /* alg: "allgatherv" */
2687     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2688     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2689     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2690     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2691     break;
2692   }
2693 
2694   C->product->data    = abt;
2695   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2696   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2697   PetscFunctionReturn(0);
2698 }
2699 
2700 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2701 {
2702   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2703   Mat_MatTransMultDense *abt;
2704   PetscErrorCode        ierr;
2705   MPI_Comm              comm;
2706   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2707   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2708   PetscInt              i,cK=A->cmap->N,k,j,bn;
2709   PetscScalar           _DOne=1.0,_DZero=0.0;
2710   const PetscScalar     *av,*bv;
2711   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2712   MPI_Request           reqs[2];
2713   const PetscInt        *ranges;
2714 
2715   PetscFunctionBegin;
2716   MatCheckProduct(C,3);
2717   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2718   abt  = (Mat_MatTransMultDense*)C->product->data;
2719   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2720   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2721   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2722   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2723   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2724   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2725   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2726   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2727   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2728   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2729   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2730   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2731   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2732   bn   = B->rmap->n;
2733   if (blda == bn) {
2734     sendbuf = (PetscScalar*)bv;
2735   } else {
2736     sendbuf = abt->buf[0];
2737     for (k = 0, i = 0; i < cK; i++) {
2738       for (j = 0; j < bn; j++, k++) {
2739         sendbuf[k] = bv[i * blda + j];
2740       }
2741     }
2742   }
2743   if (size > 1) {
2744     sendto = (rank + size - 1) % size;
2745     recvfrom = (rank + size + 1) % size;
2746   } else {
2747     sendto = recvfrom = 0;
2748   }
2749   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2750   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2751   recvisfrom = rank;
2752   for (i = 0; i < size; i++) {
2753     /* we have finished receiving in sending, bufs can be read/modified */
2754     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2755     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2756 
2757     if (nextrecvisfrom != rank) {
2758       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2759       sendsiz = cK * bn;
2760       recvsiz = cK * nextbn;
2761       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2762       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2763       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2764     }
2765 
2766     /* local aseq * sendbuf^T */
2767     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2768     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2769 
2770     if (nextrecvisfrom != rank) {
2771       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2772       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2773     }
2774     bn = nextbn;
2775     recvisfrom = nextrecvisfrom;
2776     sendbuf = recvbuf;
2777   }
2778   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2779   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2780   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2781   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2782   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2787 {
2788   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2789   Mat_MatTransMultDense *abt;
2790   PetscErrorCode        ierr;
2791   MPI_Comm              comm;
2792   PetscMPIInt           size;
2793   PetscScalar           *cv, *sendbuf, *recvbuf;
2794   const PetscScalar     *av,*bv;
2795   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2796   PetscScalar           _DOne=1.0,_DZero=0.0;
2797   PetscBLASInt          cm, cn, ck, alda, clda;
2798 
2799   PetscFunctionBegin;
2800   MatCheckProduct(C,3);
2801   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2802   abt  = (Mat_MatTransMultDense*)C->product->data;
2803   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2804   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2805   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2806   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2807   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2808   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2809   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2810   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2811   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2812   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2813   /* copy transpose of B into buf[0] */
2814   bn      = B->rmap->n;
2815   sendbuf = abt->buf[0];
2816   recvbuf = abt->buf[1];
2817   for (k = 0, j = 0; j < bn; j++) {
2818     for (i = 0; i < cK; i++, k++) {
2819       sendbuf[k] = bv[i * blda + j];
2820     }
2821   }
2822   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2823   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2824   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2825   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2826   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2827   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2828   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2829   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2830   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2831   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2832   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2837 {
2838   Mat_MatTransMultDense *abt;
2839   PetscErrorCode        ierr;
2840 
2841   PetscFunctionBegin;
2842   MatCheckProduct(C,3);
2843   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2844   abt = (Mat_MatTransMultDense*)C->product->data;
2845   switch (abt->alg) {
2846   case 1:
2847     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2848     break;
2849   default:
2850     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2851     break;
2852   }
2853   PetscFunctionReturn(0);
2854 }
2855 
2856 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2857 {
2858   PetscErrorCode   ierr;
2859   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2860 
2861   PetscFunctionBegin;
2862   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2863   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2864   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2865   ierr = PetscFree(ab);CHKERRQ(ierr);
2866   PetscFunctionReturn(0);
2867 }
2868 
2869 #if defined(PETSC_HAVE_ELEMENTAL)
2870 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2871 {
2872   PetscErrorCode   ierr;
2873   Mat_MatMultDense *ab;
2874 
2875   PetscFunctionBegin;
2876   MatCheckProduct(C,3);
2877   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2878   ab   = (Mat_MatMultDense*)C->product->data;
2879   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2880   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2881   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2882   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2883   PetscFunctionReturn(0);
2884 }
2885 
2886 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2887 {
2888   PetscErrorCode   ierr;
2889   Mat              Ae,Be,Ce;
2890   Mat_MatMultDense *ab;
2891 
2892   PetscFunctionBegin;
2893   MatCheckProduct(C,4);
2894   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2895   /* check local size of A and B */
2896   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2897     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2898   }
2899 
2900   /* create elemental matrices Ae and Be */
2901   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2902   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2903   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2904   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2905   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2906 
2907   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2908   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2909   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2910   ierr = MatSetUp(Be);CHKERRQ(ierr);
2911   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2912 
2913   /* compute symbolic Ce = Ae*Be */
2914   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2915   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2916 
2917   /* setup C */
2918   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2919   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2920   ierr = MatSetUp(C);CHKERRQ(ierr);
2921 
2922   /* create data structure for reuse Cdense */
2923   ierr = PetscNew(&ab);CHKERRQ(ierr);
2924   ab->Ae = Ae;
2925   ab->Be = Be;
2926   ab->Ce = Ce;
2927 
2928   C->product->data    = ab;
2929   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2930   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2931   PetscFunctionReturn(0);
2932 }
2933 #endif
2934 /* ----------------------------------------------- */
2935 #if defined(PETSC_HAVE_ELEMENTAL)
2936 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2937 {
2938   PetscFunctionBegin;
2939   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2940   C->ops->productsymbolic = MatProductSymbolic_AB;
2941   PetscFunctionReturn(0);
2942 }
2943 #endif
2944 
2945 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2946 {
2947   Mat_Product *product = C->product;
2948   Mat         A = product->A,B=product->B;
2949 
2950   PetscFunctionBegin;
2951   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2952     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2953   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2954   C->ops->productsymbolic = MatProductSymbolic_AtB;
2955   PetscFunctionReturn(0);
2956 }
2957 
2958 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2959 {
2960   PetscErrorCode ierr;
2961   Mat_Product    *product = C->product;
2962   const char     *algTypes[2] = {"allgatherv","cyclic"};
2963   PetscInt       alg,nalg = 2;
2964   PetscBool      flg = PETSC_FALSE;
2965 
2966   PetscFunctionBegin;
2967   /* Set default algorithm */
2968   alg = 0; /* default is allgatherv */
2969   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2970   if (flg) {
2971     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2972   }
2973 
2974   /* Get runtime option */
2975   if (product->api_user) {
2976     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2977     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2978     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2979   } else {
2980     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2981     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2982     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2983   }
2984   if (flg) {
2985     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2986   }
2987 
2988   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2989   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2990   PetscFunctionReturn(0);
2991 }
2992 
2993 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2994 {
2995   PetscErrorCode ierr;
2996   Mat_Product    *product = C->product;
2997 
2998   PetscFunctionBegin;
2999   switch (product->type) {
3000 #if defined(PETSC_HAVE_ELEMENTAL)
3001   case MATPRODUCT_AB:
3002     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3003     break;
3004 #endif
3005   case MATPRODUCT_AtB:
3006     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3007     break;
3008   case MATPRODUCT_ABt:
3009     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3010     break;
3011   default:
3012     break;
3013   }
3014   PetscFunctionReturn(0);
3015 }
3016