xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision e6aea9c08b2f330338e818e2f07e3eaaf927f247)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
46   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
47   PetscErrorCode ierr;
48 
49   PetscFunctionBegin;
50   ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
55 {
56   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
57   PetscErrorCode ierr;
58   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
59 
60   PetscFunctionBegin;
61   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
62   lrow = row - rstart;
63   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
68 {
69   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
70   PetscErrorCode ierr;
71   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
72 
73   PetscFunctionBegin;
74   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
75   lrow = row - rstart;
76   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
81 {
82   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
83   PetscErrorCode ierr;
84   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
85   PetscScalar    *array;
86   MPI_Comm       comm;
87   PetscBool      flg;
88   Mat            B;
89 
90   PetscFunctionBegin;
91   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
92   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
93   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
94   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
95 
96     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
97     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
98     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
99     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
100     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
101     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
102     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
103     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
104     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
105     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
106     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
107     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
108     *a   = B;
109     ierr = MatDestroy(&B);CHKERRQ(ierr);
110   } else *a = B;
111   PetscFunctionReturn(0);
112 }
113 
114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
117   PetscErrorCode ierr;
118   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
119   PetscBool      roworiented = A->roworiented;
120 
121   PetscFunctionBegin;
122   for (i=0; i<m; i++) {
123     if (idxm[i] < 0) continue;
124     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
125     if (idxm[i] >= rstart && idxm[i] < rend) {
126       row = idxm[i] - rstart;
127       if (roworiented) {
128         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
129       } else {
130         for (j=0; j<n; j++) {
131           if (idxn[j] < 0) continue;
132           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
133           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
134         }
135       }
136     } else if (!A->donotstash) {
137       mat->assembled = PETSC_FALSE;
138       if (roworiented) {
139         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
140       } else {
141         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
142       }
143     }
144   }
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
149 {
150   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
151   PetscErrorCode ierr;
152   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
153 
154   PetscFunctionBegin;
155   for (i=0; i<m; i++) {
156     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
157     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
158     if (idxm[i] >= rstart && idxm[i] < rend) {
159       row = idxm[i] - rstart;
160       for (j=0; j<n; j++) {
161         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
162         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
163         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
164       }
165     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
181 {
182   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
183   PetscBool      iscuda;
184   PetscErrorCode ierr;
185 
186   PetscFunctionBegin;
187   if (!a->A) {
188     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
189     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
190     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
191     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
192     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
193     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
194     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
195     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
196   }
197   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
230   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
235 {
236   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
237   PetscErrorCode ierr;
238 
239   PetscFunctionBegin;
240   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
241   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
242   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
247 {
248   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
253   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
254   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
259 {
260   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
261   PetscErrorCode ierr;
262 
263   PetscFunctionBegin;
264   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
265   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
266   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
271 {
272   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
273   PetscErrorCode    ierr;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
284   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
285   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
288   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
289   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
290   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
291   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
292   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
299   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
309     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
310     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
311     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
317   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
318   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
326   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
327 
328   /* Assemble the matrices so that the correct flags are set */
329   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331 
332   /* Free work space */
333   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
334   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
335   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343   PetscErrorCode ierr;
344 
345   PetscFunctionBegin;
346   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
351 {
352   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
361 {
362   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
363   PetscErrorCode ierr;
364 
365   PetscFunctionBegin;
366   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
371 {
372   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
373   PetscErrorCode ierr;
374   PetscInt       nstash,reallocs;
375 
376   PetscFunctionBegin;
377   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
378 
379   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
380   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
381   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
386 {
387   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
388   PetscErrorCode ierr;
389   PetscInt       i,*row,*col,flg,j,rstart,ncols;
390   PetscMPIInt    n;
391   PetscScalar    *val;
392 
393   PetscFunctionBegin;
394   if (!mdn->donotstash && !mat->nooffprocentries) {
395     /*  wait on receives */
396     while (1) {
397       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
398       if (!flg) break;
399 
400       for (i=0; i<n;) {
401         /* Now identify the consecutive vals belonging to the same row */
402         for (j=i,rstart=row[j]; j<n; j++) {
403           if (row[j] != rstart) break;
404         }
405         if (j < n) ncols = j-i;
406         else       ncols = n-i;
407         /* Now assemble all these values with a single function call */
408         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
409         i    = j;
410       }
411     }
412     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
413   }
414 
415   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
417 
418   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
419     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
425 {
426   PetscErrorCode ierr;
427   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
428 
429   PetscFunctionBegin;
430   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
435 {
436   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
437   PetscErrorCode    ierr;
438   PetscInt          i,len,*lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
449     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
450     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
451     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
452     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
453   }
454   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
455   if (diag != 0.0) {
456     Vec d;
457 
458     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
459     ierr = VecSet(d,diag);CHKERRQ(ierr);
460     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
461     ierr = VecDestroy(&d);CHKERRQ(ierr);
462   }
463   ierr = PetscFree(lrows);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode    ierr;
476   const PetscScalar *ax;
477   PetscScalar       *ay;
478   PetscMemType      axmtype,aymtype;
479 
480   PetscFunctionBegin;
481   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
482   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
483   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr);
484   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
485   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
486   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
487   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
494   PetscErrorCode    ierr;
495   const PetscScalar *ax;
496   PetscScalar       *ay;
497   PetscMemType      axmtype,aymtype;
498 
499   PetscFunctionBegin;
500   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
501   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
502   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr);
503   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
504   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
505   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
506   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode    ierr;
514   const PetscScalar *ax;
515   PetscScalar       *ay;
516   PetscMemType      axmtype,aymtype;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
520   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
522   ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr);
523   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
524   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
525   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
526   ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
533   PetscErrorCode    ierr;
534   const PetscScalar *ax;
535   PetscScalar       *ay;
536   PetscMemType      axmtype,aymtype;
537 
538   PetscFunctionBegin;
539   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
540   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
541   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
542   ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr);
543   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
544   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
545   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
546   ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
551 {
552   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
553   PetscErrorCode    ierr;
554   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
555   PetscScalar       *x,zero = 0.0;
556   const PetscScalar *av;
557 
558   PetscFunctionBegin;
559   ierr = VecSet(v,zero);CHKERRQ(ierr);
560   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
561   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
562   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
563   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
564   radd = A->rmap->rstart*m;
565   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
566   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
567   for (i=0; i<len; i++) {
568     x[i] = av[radd + i*lda + i];
569   }
570   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
571   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatDestroy_MPIDense(Mat mat)
576 {
577   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
578   PetscErrorCode ierr;
579 
580   PetscFunctionBegin;
581 #if defined(PETSC_USE_LOG)
582   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
583 #endif
584   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
585   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
586   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
587   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
588   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
589   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
590   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
591   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
592 
593   ierr = PetscFree(mat->data);CHKERRQ(ierr);
594   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
595 
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
607 #if defined(PETSC_HAVE_ELEMENTAL)
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
609 #endif
610 #if defined(PETSC_HAVE_SCALAPACK)
611   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
612 #endif
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
614   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
615   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
616 #if defined (PETSC_HAVE_CUDA)
617   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
618   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
619 #endif
620   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
621   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
622   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
623   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
624   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
625   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
626   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
627   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
628   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
634 
635 #include <petscdraw.h>
636 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
637 {
638   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
639   PetscErrorCode    ierr;
640   PetscMPIInt       rank;
641   PetscViewerType   vtype;
642   PetscBool         iascii,isdraw;
643   PetscViewer       sviewer;
644   PetscViewerFormat format;
645 
646   PetscFunctionBegin;
647   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
648   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
649   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
650   if (iascii) {
651     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
652     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
653     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
654       MatInfo info;
655       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
656       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
657       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
658       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
659       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
660       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
661       PetscFunctionReturn(0);
662     } else if (format == PETSC_VIEWER_ASCII_INFO) {
663       PetscFunctionReturn(0);
664     }
665   } else if (isdraw) {
666     PetscDraw draw;
667     PetscBool isnull;
668 
669     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
670     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
671     if (isnull) PetscFunctionReturn(0);
672   }
673 
674   {
675     /* assemble the entire matrix onto first processor. */
676     Mat         A;
677     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
678     PetscInt    *cols;
679     PetscScalar *vals;
680 
681     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
682     if (!rank) {
683       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
684     } else {
685       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
686     }
687     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
688     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
689     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
690     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
691 
692     /* Copy the matrix ... This isn't the most efficient means,
693        but it's quick for now */
694     A->insertmode = INSERT_VALUES;
695 
696     row = mat->rmap->rstart;
697     m   = mdn->A->rmap->n;
698     for (i=0; i<m; i++) {
699       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
700       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
701       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
702       row++;
703     }
704 
705     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
708     if (!rank) {
709       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
710       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
711     }
712     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
713     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
714     ierr = MatDestroy(&A);CHKERRQ(ierr);
715   }
716   PetscFunctionReturn(0);
717 }
718 
719 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
720 {
721   PetscErrorCode ierr;
722   PetscBool      iascii,isbinary,isdraw,issocket;
723 
724   PetscFunctionBegin;
725   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
726   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
727   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
728   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
729 
730   if (iascii || issocket || isdraw) {
731     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
732   } else if (isbinary) {
733     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
734   }
735   PetscFunctionReturn(0);
736 }
737 
738 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
739 {
740   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
741   Mat            mdn  = mat->A;
742   PetscErrorCode ierr;
743   PetscLogDouble isend[5],irecv[5];
744 
745   PetscFunctionBegin;
746   info->block_size = 1.0;
747 
748   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
749 
750   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
751   isend[3] = info->memory;  isend[4] = info->mallocs;
752   if (flag == MAT_LOCAL) {
753     info->nz_used      = isend[0];
754     info->nz_allocated = isend[1];
755     info->nz_unneeded  = isend[2];
756     info->memory       = isend[3];
757     info->mallocs      = isend[4];
758   } else if (flag == MAT_GLOBAL_MAX) {
759     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
760 
761     info->nz_used      = irecv[0];
762     info->nz_allocated = irecv[1];
763     info->nz_unneeded  = irecv[2];
764     info->memory       = irecv[3];
765     info->mallocs      = irecv[4];
766   } else if (flag == MAT_GLOBAL_SUM) {
767     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
768 
769     info->nz_used      = irecv[0];
770     info->nz_allocated = irecv[1];
771     info->nz_unneeded  = irecv[2];
772     info->memory       = irecv[3];
773     info->mallocs      = irecv[4];
774   }
775   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
776   info->fill_ratio_needed = 0;
777   info->factor_mallocs    = 0;
778   PetscFunctionReturn(0);
779 }
780 
781 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
782 {
783   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
784   PetscErrorCode ierr;
785 
786   PetscFunctionBegin;
787   switch (op) {
788   case MAT_NEW_NONZERO_LOCATIONS:
789   case MAT_NEW_NONZERO_LOCATION_ERR:
790   case MAT_NEW_NONZERO_ALLOCATION_ERR:
791     MatCheckPreallocated(A,1);
792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
793     break;
794   case MAT_ROW_ORIENTED:
795     MatCheckPreallocated(A,1);
796     a->roworiented = flg;
797     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
798     break;
799   case MAT_FORCE_DIAGONAL_ENTRIES:
800   case MAT_KEEP_NONZERO_PATTERN:
801   case MAT_USE_HASH_TABLE:
802   case MAT_SORTED_FULL:
803     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
804     break;
805   case MAT_IGNORE_OFF_PROC_ENTRIES:
806     a->donotstash = flg;
807     break;
808   case MAT_SYMMETRIC:
809   case MAT_STRUCTURALLY_SYMMETRIC:
810   case MAT_HERMITIAN:
811   case MAT_SYMMETRY_ETERNAL:
812   case MAT_IGNORE_LOWER_TRIANGULAR:
813   case MAT_IGNORE_ZERO_ENTRIES:
814     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
815     break;
816   default:
817     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
818   }
819   PetscFunctionReturn(0);
820 }
821 
822 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
823 {
824   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
825   const PetscScalar *l;
826   PetscScalar       x,*v,*vv,*r;
827   PetscErrorCode    ierr;
828   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
829 
830   PetscFunctionBegin;
831   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
832   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
833   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
834   if (ll) {
835     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
836     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
837     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
838     for (i=0; i<m; i++) {
839       x = l[i];
840       v = vv + i;
841       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
842     }
843     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
844     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
845   }
846   if (rr) {
847     const PetscScalar *ar;
848 
849     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
850     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
851     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
852     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
853     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
854     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
855     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
856     for (i=0; i<n; i++) {
857       x = r[i];
858       v = vv + i*lda;
859       for (j=0; j<m; j++) (*v++) *= x;
860     }
861     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
862     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
863   }
864   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
869 {
870   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
871   PetscErrorCode    ierr;
872   PetscInt          i,j;
873   PetscMPIInt       size;
874   PetscReal         sum = 0.0;
875   const PetscScalar *av,*v;
876 
877   PetscFunctionBegin;
878   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
879   v    = av;
880   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
881   if (size == 1) {
882     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
883   } else {
884     if (type == NORM_FROBENIUS) {
885       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
886         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
887       }
888       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
889       *nrm = PetscSqrtReal(*nrm);
890       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
891     } else if (type == NORM_1) {
892       PetscReal *tmp,*tmp2;
893       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
894       *nrm = 0.0;
895       v    = av;
896       for (j=0; j<mdn->A->cmap->n; j++) {
897         for (i=0; i<mdn->A->rmap->n; i++) {
898           tmp[j] += PetscAbsScalar(*v);  v++;
899         }
900       }
901       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
902       for (j=0; j<A->cmap->N; j++) {
903         if (tmp2[j] > *nrm) *nrm = tmp2[j];
904       }
905       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
906       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
907     } else if (type == NORM_INFINITY) { /* max row norm */
908       PetscReal ntemp;
909       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
910       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
911     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
912   }
913   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
914   PetscFunctionReturn(0);
915 }
916 
917 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
918 {
919   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
920   Mat            B;
921   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
922   PetscErrorCode ierr;
923   PetscInt       j,i,lda;
924   PetscScalar    *v;
925 
926   PetscFunctionBegin;
927   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
928     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
929     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
930     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
931     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
932   } else B = *matout;
933 
934   m    = a->A->rmap->n; n = a->A->cmap->n;
935   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
936   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
937   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
938   for (i=0; i<m; i++) rwork[i] = rstart + i;
939   for (j=0; j<n; j++) {
940     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
941     v   += lda;
942   }
943   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
944   ierr = PetscFree(rwork);CHKERRQ(ierr);
945   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
946   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
947   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
948     *matout = B;
949   } else {
950     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
951   }
952   PetscFunctionReturn(0);
953 }
954 
955 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
956 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
957 
958 PetscErrorCode MatSetUp_MPIDense(Mat A)
959 {
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
964   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
965   if (!A->preallocated) {
966     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
967   }
968   PetscFunctionReturn(0);
969 }
970 
971 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
972 {
973   PetscErrorCode ierr;
974   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
975 
976   PetscFunctionBegin;
977   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatConjugate_MPIDense(Mat mat)
982 {
983   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
984   PetscErrorCode ierr;
985 
986   PetscFunctionBegin;
987   ierr = MatConjugate(a->A);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 PetscErrorCode MatRealPart_MPIDense(Mat A)
992 {
993   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
994   PetscErrorCode ierr;
995 
996   PetscFunctionBegin;
997   ierr = MatRealPart(a->A);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1002 {
1003   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1012 {
1013   PetscErrorCode ierr;
1014   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1015 
1016   PetscFunctionBegin;
1017   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1018   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1019   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1024 
1025 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1026 {
1027   PetscErrorCode ierr;
1028   PetscInt       i,n;
1029   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1030   PetscReal      *work;
1031 
1032   PetscFunctionBegin;
1033   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1034   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1035   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1036   if (type == NORM_2) {
1037     for (i=0; i<n; i++) work[i] *= work[i];
1038   }
1039   if (type == NORM_INFINITY) {
1040     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1041   } else {
1042     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1043   }
1044   ierr = PetscFree(work);CHKERRQ(ierr);
1045   if (type == NORM_2) {
1046     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #if defined(PETSC_HAVE_CUDA)
1052 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1053 {
1054   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1055   PetscErrorCode ierr;
1056   PetscInt       lda;
1057 
1058   PetscFunctionBegin;
1059   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1060   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1061   if (!a->cvec) {
1062     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1063     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1064   }
1065   a->vecinuse = col + 1;
1066   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1067   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1068   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1069   *v   = a->cvec;
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1074 {
1075   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1076   PetscErrorCode ierr;
1077 
1078   PetscFunctionBegin;
1079   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1080   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1081   a->vecinuse = 0;
1082   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1083   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1084   *v   = NULL;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1089 {
1090   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1091   PetscInt       lda;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1096   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1097   if (!a->cvec) {
1098     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1099     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1100   }
1101   a->vecinuse = col + 1;
1102   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1103   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1104   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1105   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1106   *v   = a->cvec;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1111 {
1112   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1117   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1118   a->vecinuse = 0;
1119   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1120   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1121   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1122   *v   = NULL;
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1127 {
1128   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1129   PetscErrorCode ierr;
1130   PetscInt       lda;
1131 
1132   PetscFunctionBegin;
1133   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1134   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1135   if (!a->cvec) {
1136     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1137     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1138   }
1139   a->vecinuse = col + 1;
1140   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1141   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1142   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1143   *v   = a->cvec;
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1148 {
1149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1154   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1155   a->vecinuse = 0;
1156   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1157   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1158   *v   = NULL;
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1163 {
1164   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1169   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1170   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1175 {
1176   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1181   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1182   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1187 {
1188   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1193   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1194   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1199 {
1200   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1209 {
1210   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1219 {
1220   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1229 {
1230   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1239 {
1240   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1249 {
1250   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1251   PetscErrorCode ierr;
1252 
1253   PetscFunctionBegin;
1254   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1259 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1260 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1261 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1262 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1263 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1264 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1265 
1266 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1267 {
1268   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1269   PetscErrorCode ierr;
1270 
1271   PetscFunctionBegin;
1272   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1273   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1274   if (d->A) {
1275     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1276   }
1277   mat->boundtocpu = bind;
1278   if (!bind) {
1279     PetscBool iscuda;
1280 
1281     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1282     if (!iscuda) {
1283       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1284     }
1285     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1286     if (!iscuda) {
1287       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1288     }
1289     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1290     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1291     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1292     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1293     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1294     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1295   } else {
1296     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1297     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1298     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1299     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1300     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1301     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1302   }
1303   if (d->cmat) {
1304     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1310 {
1311   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1312   PetscErrorCode ierr;
1313   PetscBool      iscuda;
1314 
1315   PetscFunctionBegin;
1316   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1317   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1318   if (!iscuda) PetscFunctionReturn(0);
1319   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1320   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1321   if (!d->A) {
1322     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1323     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1324     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1325   }
1326   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1327   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1328   A->preallocated = PETSC_TRUE;
1329   PetscFunctionReturn(0);
1330 }
1331 #endif
1332 
1333 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1334 {
1335   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1344 {
1345   PetscFunctionBegin;
1346   *missing = PETSC_FALSE;
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1351 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1352 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1353 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1354 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1355 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1356 
1357 /* -------------------------------------------------------------------*/
1358 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1359                                         MatGetRow_MPIDense,
1360                                         MatRestoreRow_MPIDense,
1361                                         MatMult_MPIDense,
1362                                 /*  4*/ MatMultAdd_MPIDense,
1363                                         MatMultTranspose_MPIDense,
1364                                         MatMultTransposeAdd_MPIDense,
1365                                         NULL,
1366                                         NULL,
1367                                         NULL,
1368                                 /* 10*/ NULL,
1369                                         NULL,
1370                                         NULL,
1371                                         NULL,
1372                                         MatTranspose_MPIDense,
1373                                 /* 15*/ MatGetInfo_MPIDense,
1374                                         MatEqual_MPIDense,
1375                                         MatGetDiagonal_MPIDense,
1376                                         MatDiagonalScale_MPIDense,
1377                                         MatNorm_MPIDense,
1378                                 /* 20*/ MatAssemblyBegin_MPIDense,
1379                                         MatAssemblyEnd_MPIDense,
1380                                         MatSetOption_MPIDense,
1381                                         MatZeroEntries_MPIDense,
1382                                 /* 24*/ MatZeroRows_MPIDense,
1383                                         NULL,
1384                                         NULL,
1385                                         NULL,
1386                                         NULL,
1387                                 /* 29*/ MatSetUp_MPIDense,
1388                                         NULL,
1389                                         NULL,
1390                                         MatGetDiagonalBlock_MPIDense,
1391                                         NULL,
1392                                 /* 34*/ MatDuplicate_MPIDense,
1393                                         NULL,
1394                                         NULL,
1395                                         NULL,
1396                                         NULL,
1397                                 /* 39*/ MatAXPY_MPIDense,
1398                                         MatCreateSubMatrices_MPIDense,
1399                                         NULL,
1400                                         MatGetValues_MPIDense,
1401                                         MatCopy_MPIDense,
1402                                 /* 44*/ NULL,
1403                                         MatScale_MPIDense,
1404                                         MatShift_Basic,
1405                                         NULL,
1406                                         NULL,
1407                                 /* 49*/ MatSetRandom_MPIDense,
1408                                         NULL,
1409                                         NULL,
1410                                         NULL,
1411                                         NULL,
1412                                 /* 54*/ NULL,
1413                                         NULL,
1414                                         NULL,
1415                                         NULL,
1416                                         NULL,
1417                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1418                                         MatDestroy_MPIDense,
1419                                         MatView_MPIDense,
1420                                         NULL,
1421                                         NULL,
1422                                 /* 64*/ NULL,
1423                                         NULL,
1424                                         NULL,
1425                                         NULL,
1426                                         NULL,
1427                                 /* 69*/ NULL,
1428                                         NULL,
1429                                         NULL,
1430                                         NULL,
1431                                         NULL,
1432                                 /* 74*/ NULL,
1433                                         NULL,
1434                                         NULL,
1435                                         NULL,
1436                                         NULL,
1437                                 /* 79*/ NULL,
1438                                         NULL,
1439                                         NULL,
1440                                         NULL,
1441                                 /* 83*/ MatLoad_MPIDense,
1442                                         NULL,
1443                                         NULL,
1444                                         NULL,
1445                                         NULL,
1446                                         NULL,
1447                                 /* 89*/ NULL,
1448                                         NULL,
1449                                         NULL,
1450                                         NULL,
1451                                         NULL,
1452                                 /* 94*/ NULL,
1453                                         NULL,
1454                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1455                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1456                                         NULL,
1457                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1458                                         NULL,
1459                                         NULL,
1460                                         MatConjugate_MPIDense,
1461                                         NULL,
1462                                 /*104*/ NULL,
1463                                         MatRealPart_MPIDense,
1464                                         MatImaginaryPart_MPIDense,
1465                                         NULL,
1466                                         NULL,
1467                                 /*109*/ NULL,
1468                                         NULL,
1469                                         NULL,
1470                                         MatGetColumnVector_MPIDense,
1471                                         MatMissingDiagonal_MPIDense,
1472                                 /*114*/ NULL,
1473                                         NULL,
1474                                         NULL,
1475                                         NULL,
1476                                         NULL,
1477                                 /*119*/ NULL,
1478                                         NULL,
1479                                         NULL,
1480                                         NULL,
1481                                         NULL,
1482                                 /*124*/ NULL,
1483                                         MatGetColumnNorms_MPIDense,
1484                                         NULL,
1485                                         NULL,
1486                                         NULL,
1487                                 /*129*/ NULL,
1488                                         NULL,
1489                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1490                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1491                                         NULL,
1492                                 /*134*/ NULL,
1493                                         NULL,
1494                                         NULL,
1495                                         NULL,
1496                                         NULL,
1497                                 /*139*/ NULL,
1498                                         NULL,
1499                                         NULL,
1500                                         NULL,
1501                                         NULL,
1502                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1503                                 /*145*/ NULL,
1504                                         NULL,
1505                                         NULL
1506 };
1507 
1508 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1509 {
1510   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1511   PetscBool      iscuda;
1512   PetscErrorCode ierr;
1513 
1514   PetscFunctionBegin;
1515   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1516   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1517   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1518   if (!a->A) {
1519     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1520     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1521     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1522   }
1523   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1524   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1525   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1526   mat->preallocated = PETSC_TRUE;
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #if defined(PETSC_HAVE_ELEMENTAL)
1531 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1532 {
1533   Mat            mat_elemental;
1534   PetscErrorCode ierr;
1535   PetscScalar    *v;
1536   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1537 
1538   PetscFunctionBegin;
1539   if (reuse == MAT_REUSE_MATRIX) {
1540     mat_elemental = *newmat;
1541     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1542   } else {
1543     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1544     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1545     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1546     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1547     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1548   }
1549 
1550   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1551   for (i=0; i<N; i++) cols[i] = i;
1552   for (i=0; i<m; i++) rows[i] = rstart + i;
1553 
1554   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1555   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1556   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1557   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1558   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1559   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1560   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1561 
1562   if (reuse == MAT_INPLACE_MATRIX) {
1563     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1564   } else {
1565     *newmat = mat_elemental;
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 #endif
1570 
1571 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1572 {
1573   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1574   PetscErrorCode ierr;
1575 
1576   PetscFunctionBegin;
1577   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1582 {
1583   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1584   PetscErrorCode ierr;
1585 
1586   PetscFunctionBegin;
1587   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1592 {
1593   PetscErrorCode ierr;
1594   Mat_MPIDense   *mat;
1595   PetscInt       m,nloc,N;
1596 
1597   PetscFunctionBegin;
1598   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1599   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1600   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1601     PetscInt sum;
1602 
1603     if (n == PETSC_DECIDE) {
1604       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1605     }
1606     /* Check sum(n) = N */
1607     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1608     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1609 
1610     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1611   }
1612 
1613   /* numeric phase */
1614   mat = (Mat_MPIDense*)(*outmat)->data;
1615   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1616   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1617   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 #if defined(PETSC_HAVE_CUDA)
1622 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1623 {
1624   Mat            B;
1625   Mat_MPIDense   *m;
1626   PetscErrorCode ierr;
1627 
1628   PetscFunctionBegin;
1629   if (reuse == MAT_INITIAL_MATRIX) {
1630     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1631   } else if (reuse == MAT_REUSE_MATRIX) {
1632     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1633   }
1634 
1635   B    = *newmat;
1636   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1637   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1638   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1639   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1649   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1654   m    = (Mat_MPIDense*)(B)->data;
1655   if (m->A) {
1656     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1657     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1658   }
1659   B->ops->bindtocpu = NULL;
1660   B->offloadmask    = PETSC_OFFLOAD_CPU;
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1665 {
1666   Mat            B;
1667   Mat_MPIDense   *m;
1668   PetscErrorCode ierr;
1669 
1670   PetscFunctionBegin;
1671   if (reuse == MAT_INITIAL_MATRIX) {
1672     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1673   } else if (reuse == MAT_REUSE_MATRIX) {
1674     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1675   }
1676 
1677   B    = *newmat;
1678   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1679   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1680   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1695   m    = (Mat_MPIDense*)(B)->data;
1696   if (m->A) {
1697     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1698     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1699     B->offloadmask = PETSC_OFFLOAD_BOTH;
1700   } else {
1701     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1702   }
1703   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1704 
1705   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1706   PetscFunctionReturn(0);
1707 }
1708 #endif
1709 
1710 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1711 {
1712   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1713   PetscErrorCode ierr;
1714   PetscInt       lda;
1715 
1716   PetscFunctionBegin;
1717   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1718   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1719   if (!a->cvec) {
1720     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1721   }
1722   a->vecinuse = col + 1;
1723   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1724   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1725   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1726   *v   = a->cvec;
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1731 {
1732   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1733   PetscErrorCode ierr;
1734 
1735   PetscFunctionBegin;
1736   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1737   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1738   a->vecinuse = 0;
1739   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1740   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1741   *v   = NULL;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1746 {
1747   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1748   PetscErrorCode ierr;
1749   PetscInt       lda;
1750 
1751   PetscFunctionBegin;
1752   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1753   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1754   if (!a->cvec) {
1755     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1756   }
1757   a->vecinuse = col + 1;
1758   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1759   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1760   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1761   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1762   *v   = a->cvec;
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1767 {
1768   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1769   PetscErrorCode ierr;
1770 
1771   PetscFunctionBegin;
1772   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1773   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1774   a->vecinuse = 0;
1775   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1776   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1777   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1778   *v   = NULL;
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1783 {
1784   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1785   PetscErrorCode ierr;
1786   PetscInt       lda;
1787 
1788   PetscFunctionBegin;
1789   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1790   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1791   if (!a->cvec) {
1792     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1793   }
1794   a->vecinuse = col + 1;
1795   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1796   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1797   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1798   *v   = a->cvec;
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1803 {
1804   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1805   PetscErrorCode ierr;
1806 
1807   PetscFunctionBegin;
1808   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1809   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1810   a->vecinuse = 0;
1811   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1812   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1813   *v   = NULL;
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1818 {
1819   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1820   Mat_MPIDense   *c;
1821   PetscErrorCode ierr;
1822   MPI_Comm       comm;
1823   PetscBool      setup = PETSC_FALSE;
1824 
1825   PetscFunctionBegin;
1826   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1827   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1828   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1829   if (!a->cmat) {
1830     setup = PETSC_TRUE;
1831     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1832     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1833     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1834     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1835     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1836     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1837   } else if (cend-cbegin != a->cmat->cmap->N) {
1838     setup = PETSC_TRUE;
1839     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1840     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1841     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1842     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1843   }
1844   c = (Mat_MPIDense*)a->cmat->data;
1845   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1846   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1847   if (setup) { /* do we really need this? */
1848     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1849   }
1850   a->cmat->preallocated = PETSC_TRUE;
1851   a->cmat->assembled = PETSC_TRUE;
1852   a->matinuse = cbegin + 1;
1853   *v = a->cmat;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1858 {
1859   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1860   Mat_MPIDense   *c;
1861   PetscErrorCode ierr;
1862 
1863   PetscFunctionBegin;
1864   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1865   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1866   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1867   a->matinuse = 0;
1868   c    = (Mat_MPIDense*)a->cmat->data;
1869   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1870   *v   = NULL;
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1875 {
1876   Mat_MPIDense   *a;
1877   PetscErrorCode ierr;
1878 
1879   PetscFunctionBegin;
1880   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1881   mat->data = (void*)a;
1882   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1883 
1884   mat->insertmode = NOT_SET_VALUES;
1885 
1886   /* build cache for off array entries formed */
1887   a->donotstash = PETSC_FALSE;
1888 
1889   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1890 
1891   /* stuff used for matrix vector multiply */
1892   a->lvec        = NULL;
1893   a->Mvctx       = NULL;
1894   a->roworiented = PETSC_TRUE;
1895 
1896   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1897   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1898   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1899   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1900   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1915 #if defined(PETSC_HAVE_ELEMENTAL)
1916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1917 #endif
1918 #if defined(PETSC_HAVE_SCALAPACK)
1919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1920 #endif
1921 #if defined(PETSC_HAVE_CUDA)
1922   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1923 #endif
1924   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1925   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1926   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1927 #if defined(PETSC_HAVE_CUDA)
1928   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1929   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1930 #endif
1931 
1932   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1933   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1934   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 /*MC
1939    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1940 
1941    Options Database Keys:
1942 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1943 
1944   Level: beginner
1945 
1946 .seealso:
1947 
1948 M*/
1949 #if defined(PETSC_HAVE_CUDA)
1950 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1951 {
1952   PetscErrorCode ierr;
1953 
1954   PetscFunctionBegin;
1955   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
1956   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1957   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 #endif
1961 
1962 /*MC
1963    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1964 
1965    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1966    and MATMPIDENSE otherwise.
1967 
1968    Options Database Keys:
1969 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1970 
1971   Level: beginner
1972 
1973 
1974 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1975 M*/
1976 
1977 /*MC
1978    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1979 
1980    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1981    and MATMPIDENSECUDA otherwise.
1982 
1983    Options Database Keys:
1984 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1985 
1986   Level: beginner
1987 
1988 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1989 M*/
1990 
1991 /*@C
1992    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1993 
1994    Collective
1995 
1996    Input Parameters:
1997 .  B - the matrix
1998 -  data - optional location of matrix data.  Set data=NULL for PETSc
1999    to control all matrix memory allocation.
2000 
2001    Notes:
2002    The dense format is fully compatible with standard Fortran 77
2003    storage by columns.
2004 
2005    The data input variable is intended primarily for Fortran programmers
2006    who wish to allocate their own matrix memory space.  Most users should
2007    set data=NULL.
2008 
2009    Level: intermediate
2010 
2011 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2012 @*/
2013 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2014 {
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2019   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 /*@
2024    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2025    array provided by the user. This is useful to avoid copying an array
2026    into a matrix
2027 
2028    Not Collective
2029 
2030    Input Parameters:
2031 +  mat - the matrix
2032 -  array - the array in column major order
2033 
2034    Notes:
2035    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2036    freed when the matrix is destroyed.
2037 
2038    Level: developer
2039 
2040 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2041 
2042 @*/
2043 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2044 {
2045   PetscErrorCode ierr;
2046 
2047   PetscFunctionBegin;
2048   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2049   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2050   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2051 #if defined(PETSC_HAVE_CUDA)
2052   mat->offloadmask = PETSC_OFFLOAD_CPU;
2053 #endif
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 /*@
2058    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2059 
2060    Not Collective
2061 
2062    Input Parameters:
2063 .  mat - the matrix
2064 
2065    Notes:
2066    You can only call this after a call to MatDensePlaceArray()
2067 
2068    Level: developer
2069 
2070 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2071 
2072 @*/
2073 PetscErrorCode  MatDenseResetArray(Mat mat)
2074 {
2075   PetscErrorCode ierr;
2076 
2077   PetscFunctionBegin;
2078   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2079   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2080   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 /*@
2085    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2086    array provided by the user. This is useful to avoid copying an array
2087    into a matrix
2088 
2089    Not Collective
2090 
2091    Input Parameters:
2092 +  mat - the matrix
2093 -  array - the array in column major order
2094 
2095    Notes:
2096    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2097    freed by the user. It will be freed when the matrix is destroyed.
2098 
2099    Level: developer
2100 
2101 .seealso: MatDenseGetArray(), VecReplaceArray()
2102 @*/
2103 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2104 {
2105   PetscErrorCode ierr;
2106 
2107   PetscFunctionBegin;
2108   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2109   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2110   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2111 #if defined(PETSC_HAVE_CUDA)
2112   mat->offloadmask = PETSC_OFFLOAD_CPU;
2113 #endif
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 #if defined(PETSC_HAVE_CUDA)
2118 /*@C
2119    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2120    array provided by the user. This is useful to avoid copying an array
2121    into a matrix
2122 
2123    Not Collective
2124 
2125    Input Parameters:
2126 +  mat - the matrix
2127 -  array - the array in column major order
2128 
2129    Notes:
2130    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2131    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2132 
2133    Level: developer
2134 
2135 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2136 @*/
2137 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2138 {
2139   PetscErrorCode ierr;
2140 
2141   PetscFunctionBegin;
2142   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2143   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2144   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2145   mat->offloadmask = PETSC_OFFLOAD_GPU;
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 /*@C
2150    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2151 
2152    Not Collective
2153 
2154    Input Parameters:
2155 .  mat - the matrix
2156 
2157    Notes:
2158    You can only call this after a call to MatDenseCUDAPlaceArray()
2159 
2160    Level: developer
2161 
2162 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2163 
2164 @*/
2165 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2166 {
2167   PetscErrorCode ierr;
2168 
2169   PetscFunctionBegin;
2170   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2171   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2172   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 /*@C
2177    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2178    array provided by the user. This is useful to avoid copying an array
2179    into a matrix
2180 
2181    Not Collective
2182 
2183    Input Parameters:
2184 +  mat - the matrix
2185 -  array - the array in column major order
2186 
2187    Notes:
2188    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2189    The memory passed in CANNOT be freed by the user. It will be freed
2190    when the matrix is destroyed. The array should respect the matrix leading dimension.
2191 
2192    Level: developer
2193 
2194 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2195 @*/
2196 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2197 {
2198   PetscErrorCode ierr;
2199 
2200   PetscFunctionBegin;
2201   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2202   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2203   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2204   mat->offloadmask = PETSC_OFFLOAD_GPU;
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 /*@C
2209    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2210 
2211    Not Collective
2212 
2213    Input Parameters:
2214 .  A - the matrix
2215 
2216    Output Parameters
2217 .  array - the GPU array in column major order
2218 
2219    Notes:
2220    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2221 
2222    Level: developer
2223 
2224 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2225 @*/
2226 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2227 {
2228   PetscErrorCode ierr;
2229 
2230   PetscFunctionBegin;
2231   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2232   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2233   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 /*@C
2238    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2239 
2240    Not Collective
2241 
2242    Input Parameters:
2243 +  A - the matrix
2244 -  array - the GPU array in column major order
2245 
2246    Notes:
2247 
2248    Level: developer
2249 
2250 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2251 @*/
2252 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2253 {
2254   PetscErrorCode ierr;
2255 
2256   PetscFunctionBegin;
2257   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2258   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2259   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2260   A->offloadmask = PETSC_OFFLOAD_GPU;
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 /*@C
2265    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2266 
2267    Not Collective
2268 
2269    Input Parameters:
2270 .  A - the matrix
2271 
2272    Output Parameters
2273 .  array - the GPU array in column major order
2274 
2275    Notes:
2276    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2277 
2278    Level: developer
2279 
2280 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2281 @*/
2282 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2283 {
2284   PetscErrorCode ierr;
2285 
2286   PetscFunctionBegin;
2287   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2288   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 /*@C
2293    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2294 
2295    Not Collective
2296 
2297    Input Parameters:
2298 +  A - the matrix
2299 -  array - the GPU array in column major order
2300 
2301    Notes:
2302    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2303 
2304    Level: developer
2305 
2306 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2307 @*/
2308 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2309 {
2310   PetscErrorCode ierr;
2311 
2312   PetscFunctionBegin;
2313   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 /*@C
2318    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2319 
2320    Not Collective
2321 
2322    Input Parameters:
2323 .  A - the matrix
2324 
2325    Output Parameters
2326 .  array - the GPU array in column major order
2327 
2328    Notes:
2329    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2330 
2331    Level: developer
2332 
2333 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2334 @*/
2335 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2336 {
2337   PetscErrorCode ierr;
2338 
2339   PetscFunctionBegin;
2340   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2341   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2342   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 /*@C
2347    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2348 
2349    Not Collective
2350 
2351    Input Parameters:
2352 +  A - the matrix
2353 -  array - the GPU array in column major order
2354 
2355    Notes:
2356 
2357    Level: developer
2358 
2359 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2360 @*/
2361 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2362 {
2363   PetscErrorCode ierr;
2364 
2365   PetscFunctionBegin;
2366   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2367   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2368   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2369   A->offloadmask = PETSC_OFFLOAD_GPU;
2370   PetscFunctionReturn(0);
2371 }
2372 #endif
2373 
2374 /*@C
2375    MatCreateDense - Creates a matrix in dense format.
2376 
2377    Collective
2378 
2379    Input Parameters:
2380 +  comm - MPI communicator
2381 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2382 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2383 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2384 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2385 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2386    to control all matrix memory allocation.
2387 
2388    Output Parameter:
2389 .  A - the matrix
2390 
2391    Notes:
2392    The dense format is fully compatible with standard Fortran 77
2393    storage by columns.
2394 
2395    The data input variable is intended primarily for Fortran programmers
2396    who wish to allocate their own matrix memory space.  Most users should
2397    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2398 
2399    The user MUST specify either the local or global matrix dimensions
2400    (possibly both).
2401 
2402    Level: intermediate
2403 
2404 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2405 @*/
2406 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2407 {
2408   PetscErrorCode ierr;
2409   PetscMPIInt    size;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2413   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2414   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2415   if (size > 1) {
2416     PetscBool havedata = (PetscBool)!!data;
2417 
2418     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2419     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2420     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr);
2421     if (havedata) {  /* user provided data array, so no need to assemble */
2422       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2423       (*A)->assembled = PETSC_TRUE;
2424     }
2425   } else {
2426     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2427     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2428   }
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 #if defined(PETSC_HAVE_CUDA)
2433 /*@C
2434    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2435 
2436    Collective
2437 
2438    Input Parameters:
2439 +  comm - MPI communicator
2440 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2441 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2442 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2443 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2444 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2445    to control matrix memory allocation.
2446 
2447    Output Parameter:
2448 .  A - the matrix
2449 
2450    Notes:
2451 
2452    Level: intermediate
2453 
2454 .seealso: MatCreate(), MatCreateDense()
2455 @*/
2456 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2457 {
2458   PetscErrorCode ierr;
2459   PetscMPIInt    size;
2460 
2461   PetscFunctionBegin;
2462   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2463   PetscValidLogicalCollectiveBool(*A,!!data,6);
2464   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2465   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2466   if (size > 1) {
2467     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2468     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2469     if (data) {  /* user provided data array, so no need to assemble */
2470       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2471       (*A)->assembled = PETSC_TRUE;
2472     }
2473   } else {
2474     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2475     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2476   }
2477   PetscFunctionReturn(0);
2478 }
2479 #endif
2480 
2481 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2482 {
2483   Mat            mat;
2484   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2485   PetscErrorCode ierr;
2486 
2487   PetscFunctionBegin;
2488   *newmat = NULL;
2489   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2490   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2491   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2492   a       = (Mat_MPIDense*)mat->data;
2493 
2494   mat->factortype   = A->factortype;
2495   mat->assembled    = PETSC_TRUE;
2496   mat->preallocated = PETSC_TRUE;
2497 
2498   mat->insertmode = NOT_SET_VALUES;
2499   a->donotstash   = oldmat->donotstash;
2500 
2501   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2502   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2503 
2504   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2505   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2506   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2507 
2508   *newmat = mat;
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2513 {
2514   PetscErrorCode ierr;
2515   PetscBool      isbinary;
2516 #if defined(PETSC_HAVE_HDF5)
2517   PetscBool      ishdf5;
2518 #endif
2519 
2520   PetscFunctionBegin;
2521   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2522   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2523   /* force binary viewer to load .info file if it has not yet done so */
2524   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2525   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2526 #if defined(PETSC_HAVE_HDF5)
2527   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2528 #endif
2529   if (isbinary) {
2530     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2531 #if defined(PETSC_HAVE_HDF5)
2532   } else if (ishdf5) {
2533     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2534 #endif
2535   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2540 {
2541   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2542   Mat            a,b;
2543   PetscBool      flg;
2544   PetscErrorCode ierr;
2545 
2546   PetscFunctionBegin;
2547   a    = matA->A;
2548   b    = matB->A;
2549   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2550   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2555 {
2556   PetscErrorCode        ierr;
2557   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2558 
2559   PetscFunctionBegin;
2560   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2561   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2562   ierr = PetscFree(atb);CHKERRQ(ierr);
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2567 {
2568   PetscErrorCode        ierr;
2569   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2570 
2571   PetscFunctionBegin;
2572   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2573   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2574   ierr = PetscFree(abt);CHKERRQ(ierr);
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2579 {
2580   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2581   Mat_TransMatMultDense *atb;
2582   PetscErrorCode        ierr;
2583   MPI_Comm              comm;
2584   PetscMPIInt           size,*recvcounts;
2585   PetscScalar           *carray,*sendbuf;
2586   const PetscScalar     *atbarray;
2587   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2588   const PetscInt        *ranges;
2589 
2590   PetscFunctionBegin;
2591   MatCheckProduct(C,3);
2592   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2593   atb = (Mat_TransMatMultDense *)C->product->data;
2594   recvcounts = atb->recvcounts;
2595   sendbuf = atb->sendbuf;
2596 
2597   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2598   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2599 
2600   /* compute atbarray = aseq^T * bseq */
2601   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2602 
2603   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2604 
2605   /* arrange atbarray into sendbuf */
2606   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2607   for (proc=0, k=0; proc<size; proc++) {
2608     for (j=0; j<cN; j++) {
2609       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2610     }
2611   }
2612   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2613 
2614   /* sum all atbarray to local values of C */
2615   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2616   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr);
2617   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2618   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2619   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2620   PetscFunctionReturn(0);
2621 }
2622 
2623 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2624 {
2625   PetscErrorCode        ierr;
2626   MPI_Comm              comm;
2627   PetscMPIInt           size;
2628   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2629   Mat_TransMatMultDense *atb;
2630   PetscBool             cisdense;
2631   PetscInt              i;
2632   const PetscInt        *ranges;
2633 
2634   PetscFunctionBegin;
2635   MatCheckProduct(C,3);
2636   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2637   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2638   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2639     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2640   }
2641 
2642   /* create matrix product C */
2643   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2644   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2645   if (!cisdense) {
2646     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2647   }
2648   ierr = MatSetUp(C);CHKERRQ(ierr);
2649 
2650   /* create data structure for reuse C */
2651   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2652   ierr = PetscNew(&atb);CHKERRQ(ierr);
2653   cM   = C->rmap->N;
2654   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2655   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2656   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2657 
2658   C->product->data    = atb;
2659   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2664 {
2665   PetscErrorCode        ierr;
2666   MPI_Comm              comm;
2667   PetscMPIInt           i, size;
2668   PetscInt              maxRows, bufsiz;
2669   PetscMPIInt           tag;
2670   PetscInt              alg;
2671   Mat_MatTransMultDense *abt;
2672   Mat_Product           *product = C->product;
2673   PetscBool             flg;
2674 
2675   PetscFunctionBegin;
2676   MatCheckProduct(C,4);
2677   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2678   /* check local size of A and B */
2679   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2680 
2681   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2682   alg  = flg ? 0 : 1;
2683 
2684   /* setup matrix product C */
2685   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2686   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2687   ierr = MatSetUp(C);CHKERRQ(ierr);
2688   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2689 
2690   /* create data structure for reuse C */
2691   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2692   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2693   ierr = PetscNew(&abt);CHKERRQ(ierr);
2694   abt->tag = tag;
2695   abt->alg = alg;
2696   switch (alg) {
2697   case 1: /* alg: "cyclic" */
2698     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2699     bufsiz = A->cmap->N * maxRows;
2700     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2701     break;
2702   default: /* alg: "allgatherv" */
2703     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2704     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2705     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2706     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2707     break;
2708   }
2709 
2710   C->product->data    = abt;
2711   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2712   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2717 {
2718   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2719   Mat_MatTransMultDense *abt;
2720   PetscErrorCode        ierr;
2721   MPI_Comm              comm;
2722   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2723   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2724   PetscInt              i,cK=A->cmap->N,k,j,bn;
2725   PetscScalar           _DOne=1.0,_DZero=0.0;
2726   const PetscScalar     *av,*bv;
2727   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2728   MPI_Request           reqs[2];
2729   const PetscInt        *ranges;
2730 
2731   PetscFunctionBegin;
2732   MatCheckProduct(C,3);
2733   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2734   abt  = (Mat_MatTransMultDense*)C->product->data;
2735   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2736   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2737   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2738   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2739   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2740   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2741   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2742   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2743   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2744   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2745   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2746   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2747   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2748   bn   = B->rmap->n;
2749   if (blda == bn) {
2750     sendbuf = (PetscScalar*)bv;
2751   } else {
2752     sendbuf = abt->buf[0];
2753     for (k = 0, i = 0; i < cK; i++) {
2754       for (j = 0; j < bn; j++, k++) {
2755         sendbuf[k] = bv[i * blda + j];
2756       }
2757     }
2758   }
2759   if (size > 1) {
2760     sendto = (rank + size - 1) % size;
2761     recvfrom = (rank + size + 1) % size;
2762   } else {
2763     sendto = recvfrom = 0;
2764   }
2765   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2766   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2767   recvisfrom = rank;
2768   for (i = 0; i < size; i++) {
2769     /* we have finished receiving in sending, bufs can be read/modified */
2770     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2771     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2772 
2773     if (nextrecvisfrom != rank) {
2774       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2775       sendsiz = cK * bn;
2776       recvsiz = cK * nextbn;
2777       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2778       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr);
2779       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr);
2780     }
2781 
2782     /* local aseq * sendbuf^T */
2783     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2784     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2785 
2786     if (nextrecvisfrom != rank) {
2787       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2788       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
2789     }
2790     bn = nextbn;
2791     recvisfrom = nextrecvisfrom;
2792     sendbuf = recvbuf;
2793   }
2794   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2795   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2796   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2797   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2798   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2803 {
2804   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2805   Mat_MatTransMultDense *abt;
2806   PetscErrorCode        ierr;
2807   MPI_Comm              comm;
2808   PetscMPIInt           size;
2809   PetscScalar           *cv, *sendbuf, *recvbuf;
2810   const PetscScalar     *av,*bv;
2811   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2812   PetscScalar           _DOne=1.0,_DZero=0.0;
2813   PetscBLASInt          cm, cn, ck, alda, clda;
2814 
2815   PetscFunctionBegin;
2816   MatCheckProduct(C,3);
2817   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2818   abt  = (Mat_MatTransMultDense*)C->product->data;
2819   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2820   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2821   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2822   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2823   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2824   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2825   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2826   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2827   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2828   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2829   /* copy transpose of B into buf[0] */
2830   bn      = B->rmap->n;
2831   sendbuf = abt->buf[0];
2832   recvbuf = abt->buf[1];
2833   for (k = 0, j = 0; j < bn; j++) {
2834     for (i = 0; i < cK; i++, k++) {
2835       sendbuf[k] = bv[i * blda + j];
2836     }
2837   }
2838   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2839   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr);
2840   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2841   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2842   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2843   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2844   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2845   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2846   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2847   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2848   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2853 {
2854   Mat_MatTransMultDense *abt;
2855   PetscErrorCode        ierr;
2856 
2857   PetscFunctionBegin;
2858   MatCheckProduct(C,3);
2859   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2860   abt = (Mat_MatTransMultDense*)C->product->data;
2861   switch (abt->alg) {
2862   case 1:
2863     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2864     break;
2865   default:
2866     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2867     break;
2868   }
2869   PetscFunctionReturn(0);
2870 }
2871 
2872 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2873 {
2874   PetscErrorCode   ierr;
2875   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2876 
2877   PetscFunctionBegin;
2878   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2879   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2880   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2881   ierr = PetscFree(ab);CHKERRQ(ierr);
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 #if defined(PETSC_HAVE_ELEMENTAL)
2886 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2887 {
2888   PetscErrorCode   ierr;
2889   Mat_MatMultDense *ab;
2890 
2891   PetscFunctionBegin;
2892   MatCheckProduct(C,3);
2893   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2894   ab   = (Mat_MatMultDense*)C->product->data;
2895   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2896   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2897   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2898   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2899   PetscFunctionReturn(0);
2900 }
2901 
2902 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2903 {
2904   PetscErrorCode   ierr;
2905   Mat              Ae,Be,Ce;
2906   Mat_MatMultDense *ab;
2907 
2908   PetscFunctionBegin;
2909   MatCheckProduct(C,4);
2910   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2911   /* check local size of A and B */
2912   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2913     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2914   }
2915 
2916   /* create elemental matrices Ae and Be */
2917   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2918   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2919   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2920   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2921   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2922 
2923   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2924   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2925   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2926   ierr = MatSetUp(Be);CHKERRQ(ierr);
2927   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2928 
2929   /* compute symbolic Ce = Ae*Be */
2930   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2931   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2932 
2933   /* setup C */
2934   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2935   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2936   ierr = MatSetUp(C);CHKERRQ(ierr);
2937 
2938   /* create data structure for reuse Cdense */
2939   ierr = PetscNew(&ab);CHKERRQ(ierr);
2940   ab->Ae = Ae;
2941   ab->Be = Be;
2942   ab->Ce = Ce;
2943 
2944   C->product->data    = ab;
2945   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2946   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2947   PetscFunctionReturn(0);
2948 }
2949 #endif
2950 /* ----------------------------------------------- */
2951 #if defined(PETSC_HAVE_ELEMENTAL)
2952 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2953 {
2954   PetscFunctionBegin;
2955   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2956   C->ops->productsymbolic = MatProductSymbolic_AB;
2957   PetscFunctionReturn(0);
2958 }
2959 #endif
2960 
2961 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2962 {
2963   Mat_Product *product = C->product;
2964   Mat         A = product->A,B=product->B;
2965 
2966   PetscFunctionBegin;
2967   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2968     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2969   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2970   C->ops->productsymbolic = MatProductSymbolic_AtB;
2971   PetscFunctionReturn(0);
2972 }
2973 
2974 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2975 {
2976   PetscErrorCode ierr;
2977   Mat_Product    *product = C->product;
2978   const char     *algTypes[2] = {"allgatherv","cyclic"};
2979   PetscInt       alg,nalg = 2;
2980   PetscBool      flg = PETSC_FALSE;
2981 
2982   PetscFunctionBegin;
2983   /* Set default algorithm */
2984   alg = 0; /* default is allgatherv */
2985   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2986   if (flg) {
2987     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2988   }
2989 
2990   /* Get runtime option */
2991   if (product->api_user) {
2992     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2993     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2994     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2995   } else {
2996     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2997     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2998     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2999   }
3000   if (flg) {
3001     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3002   }
3003 
3004   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3005   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3010 {
3011   PetscErrorCode ierr;
3012   Mat_Product    *product = C->product;
3013 
3014   PetscFunctionBegin;
3015   switch (product->type) {
3016 #if defined(PETSC_HAVE_ELEMENTAL)
3017   case MATPRODUCT_AB:
3018     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3019     break;
3020 #endif
3021   case MATPRODUCT_AtB:
3022     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3023     break;
3024   case MATPRODUCT_ABt:
3025     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3026     break;
3027   default:
3028     break;
3029   }
3030   PetscFunctionReturn(0);
3031 }
3032