xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision f2e2784ee781ab53297b5e7ed5f71c2be92534c1)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
46   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
47   PetscErrorCode ierr;
48 
49   PetscFunctionBegin;
50   ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
55 {
56   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
57   PetscErrorCode ierr;
58   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
59 
60   PetscFunctionBegin;
61   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
62   lrow = row - rstart;
63   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
68 {
69   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
70   PetscErrorCode ierr;
71   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
72 
73   PetscFunctionBegin;
74   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
75   lrow = row - rstart;
76   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
81 {
82   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
83   PetscErrorCode ierr;
84   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
85   PetscScalar    *array;
86   MPI_Comm       comm;
87   PetscBool      flg;
88   Mat            B;
89 
90   PetscFunctionBegin;
91   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
92   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
93   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
94   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
95 
96     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
97     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
98     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
99     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
100     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
101     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
102     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
103     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
104     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
105     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
106     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
107     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
108     *a   = B;
109     ierr = MatDestroy(&B);CHKERRQ(ierr);
110   } else *a = B;
111   PetscFunctionReturn(0);
112 }
113 
114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
117   PetscErrorCode ierr;
118   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
119   PetscBool      roworiented = A->roworiented;
120 
121   PetscFunctionBegin;
122   for (i=0; i<m; i++) {
123     if (idxm[i] < 0) continue;
124     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
125     if (idxm[i] >= rstart && idxm[i] < rend) {
126       row = idxm[i] - rstart;
127       if (roworiented) {
128         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
129       } else {
130         for (j=0; j<n; j++) {
131           if (idxn[j] < 0) continue;
132           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
133           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
134         }
135       }
136     } else if (!A->donotstash) {
137       mat->assembled = PETSC_FALSE;
138       if (roworiented) {
139         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
140       } else {
141         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
142       }
143     }
144   }
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
149 {
150   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
151   PetscErrorCode ierr;
152   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
153 
154   PetscFunctionBegin;
155   for (i=0; i<m; i++) {
156     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
157     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
158     if (idxm[i] >= rstart && idxm[i] < rend) {
159       row = idxm[i] - rstart;
160       for (j=0; j<n; j++) {
161         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
162         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
163         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
164       }
165     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
181 {
182   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
183   PetscBool      iscuda;
184   PetscErrorCode ierr;
185 
186   PetscFunctionBegin;
187   if (!a->A) {
188     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
189     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
190     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
191     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
192     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
193     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
194     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
195     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
196   }
197   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
230   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
235 {
236   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
237   PetscErrorCode ierr;
238 
239   PetscFunctionBegin;
240   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
241   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
242   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
247 {
248   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
253   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
254   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
259 {
260   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
261   PetscErrorCode ierr;
262 
263   PetscFunctionBegin;
264   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
265   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
266   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
271 {
272   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
273   PetscErrorCode    ierr;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
284   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
285   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
288   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
289   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
290   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
291   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
292   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
299   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
309     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
310     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
311     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
317   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
318   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
326   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
327 
328   /* Assemble the matrices so that the correct flags are set */
329   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331 
332   /* Free work space */
333   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
334   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
335   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343   PetscErrorCode ierr;
344 
345   PetscFunctionBegin;
346   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
351 {
352   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
361 {
362   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
363   PetscErrorCode ierr;
364 
365   PetscFunctionBegin;
366   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
371 {
372   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
373   PetscErrorCode ierr;
374   PetscInt       nstash,reallocs;
375 
376   PetscFunctionBegin;
377   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
378 
379   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
380   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
381   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
386 {
387   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
388   PetscErrorCode ierr;
389   PetscInt       i,*row,*col,flg,j,rstart,ncols;
390   PetscMPIInt    n;
391   PetscScalar    *val;
392 
393   PetscFunctionBegin;
394   if (!mdn->donotstash && !mat->nooffprocentries) {
395     /*  wait on receives */
396     while (1) {
397       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
398       if (!flg) break;
399 
400       for (i=0; i<n;) {
401         /* Now identify the consecutive vals belonging to the same row */
402         for (j=i,rstart=row[j]; j<n; j++) {
403           if (row[j] != rstart) break;
404         }
405         if (j < n) ncols = j-i;
406         else       ncols = n-i;
407         /* Now assemble all these values with a single function call */
408         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
409         i    = j;
410       }
411     }
412     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
413   }
414 
415   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
417 
418   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
419     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
425 {
426   PetscErrorCode ierr;
427   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
428 
429   PetscFunctionBegin;
430   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
435 {
436   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
437   PetscErrorCode    ierr;
438   PetscInt          i,len,*lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
449     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
450     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
451     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
452     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
453   }
454   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
455   if (diag != 0.0) {
456     Vec d;
457 
458     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
459     ierr = VecSet(d,diag);CHKERRQ(ierr);
460     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
461     ierr = VecDestroy(&d);CHKERRQ(ierr);
462   }
463   ierr = PetscFree(lrows);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode    ierr;
476   const PetscScalar *ax;
477   PetscScalar       *ay;
478   PetscMemType      axmtype,aymtype;
479 
480   PetscFunctionBegin;
481   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
482   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
483   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr);
484   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
485   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
486   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
487   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
494   PetscErrorCode    ierr;
495   const PetscScalar *ax;
496   PetscScalar       *ay;
497   PetscMemType      axmtype,aymtype;
498 
499   PetscFunctionBegin;
500   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
501   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
502   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr);
503   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
504   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
505   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
506   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode    ierr;
514   const PetscScalar *ax;
515   PetscScalar       *ay;
516   PetscMemType      axmtype,aymtype;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
520   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
522   ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr);
523   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
524   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
525   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
526   ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
533   PetscErrorCode    ierr;
534   const PetscScalar *ax;
535   PetscScalar       *ay;
536   PetscMemType      axmtype,aymtype;
537 
538   PetscFunctionBegin;
539   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
540   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
541   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
542   ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr);
543   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
544   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
545   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
546   ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
551 {
552   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
553   PetscErrorCode    ierr;
554   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
555   PetscScalar       *x,zero = 0.0;
556   const PetscScalar *av;
557 
558   PetscFunctionBegin;
559   ierr = VecSet(v,zero);CHKERRQ(ierr);
560   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
561   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
562   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
563   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
564   radd = A->rmap->rstart*m;
565   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
566   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
567   for (i=0; i<len; i++) {
568     x[i] = av[radd + i*lda + i];
569   }
570   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
571   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatDestroy_MPIDense(Mat mat)
576 {
577   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
578   PetscErrorCode ierr;
579 
580   PetscFunctionBegin;
581 #if defined(PETSC_USE_LOG)
582   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
583 #endif
584   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
585   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
586   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
587   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
588   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
589   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
590   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
591   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
592 
593   ierr = PetscFree(mat->data);CHKERRQ(ierr);
594   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
595 
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
607 #if defined(PETSC_HAVE_ELEMENTAL)
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
609 #endif
610 #if defined(PETSC_HAVE_SCALAPACK)
611   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
612 #endif
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
614   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
615   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
616 #if defined (PETSC_HAVE_CUDA)
617   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
618   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
619 #endif
620   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
621   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
622   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
623   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
624   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
625   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
626   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
627   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
628   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
634 
635 #include <petscdraw.h>
636 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
637 {
638   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
639   PetscErrorCode    ierr;
640   PetscMPIInt       rank;
641   PetscViewerType   vtype;
642   PetscBool         iascii,isdraw;
643   PetscViewer       sviewer;
644   PetscViewerFormat format;
645 
646   PetscFunctionBegin;
647   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
648   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
649   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
650   if (iascii) {
651     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
652     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
653     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
654       MatInfo info;
655       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
656       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
657       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
658       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
659       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
660       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
661       PetscFunctionReturn(0);
662     } else if (format == PETSC_VIEWER_ASCII_INFO) {
663       PetscFunctionReturn(0);
664     }
665   } else if (isdraw) {
666     PetscDraw draw;
667     PetscBool isnull;
668 
669     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
670     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
671     if (isnull) PetscFunctionReturn(0);
672   }
673 
674   {
675     /* assemble the entire matrix onto first processor. */
676     Mat         A;
677     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
678     PetscInt    *cols;
679     PetscScalar *vals;
680 
681     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
682     if (!rank) {
683       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
684     } else {
685       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
686     }
687     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
688     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
689     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
690     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
691 
692     /* Copy the matrix ... This isn't the most efficient means,
693        but it's quick for now */
694     A->insertmode = INSERT_VALUES;
695 
696     row = mat->rmap->rstart;
697     m   = mdn->A->rmap->n;
698     for (i=0; i<m; i++) {
699       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
700       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
701       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
702       row++;
703     }
704 
705     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
708     if (!rank) {
709       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
710       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
711     }
712     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
713     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
714     ierr = MatDestroy(&A);CHKERRQ(ierr);
715   }
716   PetscFunctionReturn(0);
717 }
718 
719 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
720 {
721   PetscErrorCode ierr;
722   PetscBool      iascii,isbinary,isdraw,issocket;
723 
724   PetscFunctionBegin;
725   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
726   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
727   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
728   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
729 
730   if (iascii || issocket || isdraw) {
731     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
732   } else if (isbinary) {
733     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
734   }
735   PetscFunctionReturn(0);
736 }
737 
738 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
739 {
740   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
741   Mat            mdn  = mat->A;
742   PetscErrorCode ierr;
743   PetscLogDouble isend[5],irecv[5];
744 
745   PetscFunctionBegin;
746   info->block_size = 1.0;
747 
748   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
749 
750   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
751   isend[3] = info->memory;  isend[4] = info->mallocs;
752   if (flag == MAT_LOCAL) {
753     info->nz_used      = isend[0];
754     info->nz_allocated = isend[1];
755     info->nz_unneeded  = isend[2];
756     info->memory       = isend[3];
757     info->mallocs      = isend[4];
758   } else if (flag == MAT_GLOBAL_MAX) {
759     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
760 
761     info->nz_used      = irecv[0];
762     info->nz_allocated = irecv[1];
763     info->nz_unneeded  = irecv[2];
764     info->memory       = irecv[3];
765     info->mallocs      = irecv[4];
766   } else if (flag == MAT_GLOBAL_SUM) {
767     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
768 
769     info->nz_used      = irecv[0];
770     info->nz_allocated = irecv[1];
771     info->nz_unneeded  = irecv[2];
772     info->memory       = irecv[3];
773     info->mallocs      = irecv[4];
774   }
775   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
776   info->fill_ratio_needed = 0;
777   info->factor_mallocs    = 0;
778   PetscFunctionReturn(0);
779 }
780 
781 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
782 {
783   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
784   PetscErrorCode ierr;
785 
786   PetscFunctionBegin;
787   switch (op) {
788   case MAT_NEW_NONZERO_LOCATIONS:
789   case MAT_NEW_NONZERO_LOCATION_ERR:
790   case MAT_NEW_NONZERO_ALLOCATION_ERR:
791     MatCheckPreallocated(A,1);
792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
793     break;
794   case MAT_ROW_ORIENTED:
795     MatCheckPreallocated(A,1);
796     a->roworiented = flg;
797     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
798     break;
799   case MAT_FORCE_DIAGONAL_ENTRIES:
800   case MAT_KEEP_NONZERO_PATTERN:
801   case MAT_USE_HASH_TABLE:
802   case MAT_SORTED_FULL:
803     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
804     break;
805   case MAT_IGNORE_OFF_PROC_ENTRIES:
806     a->donotstash = flg;
807     break;
808   case MAT_SYMMETRIC:
809   case MAT_STRUCTURALLY_SYMMETRIC:
810   case MAT_HERMITIAN:
811   case MAT_SYMMETRY_ETERNAL:
812   case MAT_IGNORE_LOWER_TRIANGULAR:
813   case MAT_IGNORE_ZERO_ENTRIES:
814     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
815     break;
816   default:
817     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
818   }
819   PetscFunctionReturn(0);
820 }
821 
822 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
823 {
824   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
825   const PetscScalar *l;
826   PetscScalar       x,*v,*vv,*r;
827   PetscErrorCode    ierr;
828   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
829 
830   PetscFunctionBegin;
831   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
832   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
833   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
834   if (ll) {
835     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
836     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
837     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
838     for (i=0; i<m; i++) {
839       x = l[i];
840       v = vv + i;
841       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
842     }
843     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
844     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
845   }
846   if (rr) {
847     const PetscScalar *ar;
848 
849     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
850     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
851     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
852     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
853     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
854     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
855     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
856     for (i=0; i<n; i++) {
857       x = r[i];
858       v = vv + i*lda;
859       for (j=0; j<m; j++) (*v++) *= x;
860     }
861     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
862     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
863   }
864   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
869 {
870   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
871   PetscErrorCode    ierr;
872   PetscInt          i,j;
873   PetscMPIInt       size;
874   PetscReal         sum = 0.0;
875   const PetscScalar *av,*v;
876 
877   PetscFunctionBegin;
878   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
879   v    = av;
880   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
881   if (size == 1) {
882     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
883   } else {
884     if (type == NORM_FROBENIUS) {
885       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
886         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
887       }
888       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
889       *nrm = PetscSqrtReal(*nrm);
890       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
891     } else if (type == NORM_1) {
892       PetscReal *tmp,*tmp2;
893       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
894       *nrm = 0.0;
895       v    = av;
896       for (j=0; j<mdn->A->cmap->n; j++) {
897         for (i=0; i<mdn->A->rmap->n; i++) {
898           tmp[j] += PetscAbsScalar(*v);  v++;
899         }
900       }
901       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
902       for (j=0; j<A->cmap->N; j++) {
903         if (tmp2[j] > *nrm) *nrm = tmp2[j];
904       }
905       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
906       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
907     } else if (type == NORM_INFINITY) { /* max row norm */
908       PetscReal ntemp;
909       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
910       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
911     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
912   }
913   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
914   PetscFunctionReturn(0);
915 }
916 
917 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
918 {
919   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
920   Mat            B;
921   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
922   PetscErrorCode ierr;
923   PetscInt       j,i,lda;
924   PetscScalar    *v;
925 
926   PetscFunctionBegin;
927   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
928     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
929     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
930     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
931     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
932   } else B = *matout;
933 
934   m    = a->A->rmap->n; n = a->A->cmap->n;
935   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
936   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
937   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
938   for (i=0; i<m; i++) rwork[i] = rstart + i;
939   for (j=0; j<n; j++) {
940     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
941     v   += lda;
942   }
943   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
944   ierr = PetscFree(rwork);CHKERRQ(ierr);
945   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
946   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
947   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
948     *matout = B;
949   } else {
950     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
951   }
952   PetscFunctionReturn(0);
953 }
954 
955 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
956 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
957 
958 PetscErrorCode MatSetUp_MPIDense(Mat A)
959 {
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
964   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
965   if (!A->preallocated) {
966     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
967   }
968   PetscFunctionReturn(0);
969 }
970 
971 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
972 {
973   PetscErrorCode ierr;
974   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
975 
976   PetscFunctionBegin;
977   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatConjugate_MPIDense(Mat mat)
982 {
983   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
984   PetscErrorCode ierr;
985 
986   PetscFunctionBegin;
987   ierr = MatConjugate(a->A);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 PetscErrorCode MatRealPart_MPIDense(Mat A)
992 {
993   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
994   PetscErrorCode ierr;
995 
996   PetscFunctionBegin;
997   ierr = MatRealPart(a->A);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1002 {
1003   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1012 {
1013   PetscErrorCode ierr;
1014   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1015 
1016   PetscFunctionBegin;
1017   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1018   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1019   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1024 
1025 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1026 {
1027   PetscErrorCode ierr;
1028   PetscInt       i,n;
1029   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1030   PetscReal      *work;
1031 
1032   PetscFunctionBegin;
1033   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1034   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1035   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1036   if (type == NORM_2) {
1037     for (i=0; i<n; i++) work[i] *= work[i];
1038   }
1039   if (type == NORM_INFINITY) {
1040     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1041   } else {
1042     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1043   }
1044   ierr = PetscFree(work);CHKERRQ(ierr);
1045   if (type == NORM_2) {
1046     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #if defined(PETSC_HAVE_CUDA)
1052 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1053 {
1054   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1055   PetscErrorCode ierr;
1056   PetscInt       lda;
1057 
1058   PetscFunctionBegin;
1059   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1060   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1061   if (!a->cvec) {
1062     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1063     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1064   }
1065   a->vecinuse = col + 1;
1066   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1067   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1068   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1069   *v   = a->cvec;
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1074 {
1075   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1076   PetscErrorCode ierr;
1077 
1078   PetscFunctionBegin;
1079   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1080   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1081   a->vecinuse = 0;
1082   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1083   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1084   *v   = NULL;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1089 {
1090   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1091   PetscInt       lda;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1096   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1097   if (!a->cvec) {
1098     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1099     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1100   }
1101   a->vecinuse = col + 1;
1102   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1103   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1104   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1105   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1106   *v   = a->cvec;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1111 {
1112   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1117   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1118   a->vecinuse = 0;
1119   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1120   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1121   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1122   *v   = NULL;
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1127 {
1128   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1129   PetscErrorCode ierr;
1130   PetscInt       lda;
1131 
1132   PetscFunctionBegin;
1133   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1134   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1135   if (!a->cvec) {
1136     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1137     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1138   }
1139   a->vecinuse = col + 1;
1140   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1141   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1142   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1143   *v   = a->cvec;
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1148 {
1149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1154   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1155   a->vecinuse = 0;
1156   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1157   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1158   *v   = NULL;
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1163 {
1164   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1169   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1170   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1175 {
1176   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1181   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1182   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1187 {
1188   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1193   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1194   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1199 {
1200   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1209 {
1210   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1219 {
1220   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1229 {
1230   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1239 {
1240   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1249 {
1250   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1251   PetscErrorCode ierr;
1252 
1253   PetscFunctionBegin;
1254   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1259 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1260 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1261 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1262 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1263 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1264 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1265 
1266 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1267 {
1268   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1269   PetscErrorCode ierr;
1270 
1271   PetscFunctionBegin;
1272   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1273   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1274   if (d->A) {
1275     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1276   }
1277   mat->boundtocpu = bind;
1278   if (!bind) {
1279     PetscBool iscuda;
1280 
1281     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1282     if (!iscuda) {
1283       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1284     }
1285     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1286     if (!iscuda) {
1287       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1288     }
1289     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1290     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1291     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1292     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1293     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1294     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1295   } else {
1296     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1297     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1298     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1299     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1300     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1301     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1302   }
1303   if (d->cmat) {
1304     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1310 {
1311   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1312   PetscErrorCode ierr;
1313   PetscBool      iscuda;
1314 
1315   PetscFunctionBegin;
1316   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1317   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1318   if (!iscuda) PetscFunctionReturn(0);
1319   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1320   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1321   if (!d->A) {
1322     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1323     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1324     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1325   }
1326   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1327   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1328   A->preallocated = PETSC_TRUE;
1329   PetscFunctionReturn(0);
1330 }
1331 #endif
1332 
1333 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1334 {
1335   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1344 {
1345   PetscFunctionBegin;
1346   *missing = PETSC_FALSE;
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1351 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1352 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1353 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1354 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1355 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1356 
1357 /* -------------------------------------------------------------------*/
1358 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1359                                         MatGetRow_MPIDense,
1360                                         MatRestoreRow_MPIDense,
1361                                         MatMult_MPIDense,
1362                                 /*  4*/ MatMultAdd_MPIDense,
1363                                         MatMultTranspose_MPIDense,
1364                                         MatMultTransposeAdd_MPIDense,
1365                                         NULL,
1366                                         NULL,
1367                                         NULL,
1368                                 /* 10*/ NULL,
1369                                         NULL,
1370                                         NULL,
1371                                         NULL,
1372                                         MatTranspose_MPIDense,
1373                                 /* 15*/ MatGetInfo_MPIDense,
1374                                         MatEqual_MPIDense,
1375                                         MatGetDiagonal_MPIDense,
1376                                         MatDiagonalScale_MPIDense,
1377                                         MatNorm_MPIDense,
1378                                 /* 20*/ MatAssemblyBegin_MPIDense,
1379                                         MatAssemblyEnd_MPIDense,
1380                                         MatSetOption_MPIDense,
1381                                         MatZeroEntries_MPIDense,
1382                                 /* 24*/ MatZeroRows_MPIDense,
1383                                         NULL,
1384                                         NULL,
1385                                         NULL,
1386                                         NULL,
1387                                 /* 29*/ MatSetUp_MPIDense,
1388                                         NULL,
1389                                         NULL,
1390                                         MatGetDiagonalBlock_MPIDense,
1391                                         NULL,
1392                                 /* 34*/ MatDuplicate_MPIDense,
1393                                         NULL,
1394                                         NULL,
1395                                         NULL,
1396                                         NULL,
1397                                 /* 39*/ MatAXPY_MPIDense,
1398                                         MatCreateSubMatrices_MPIDense,
1399                                         NULL,
1400                                         MatGetValues_MPIDense,
1401                                         MatCopy_MPIDense,
1402                                 /* 44*/ NULL,
1403                                         MatScale_MPIDense,
1404                                         MatShift_Basic,
1405                                         NULL,
1406                                         NULL,
1407                                 /* 49*/ MatSetRandom_MPIDense,
1408                                         NULL,
1409                                         NULL,
1410                                         NULL,
1411                                         NULL,
1412                                 /* 54*/ NULL,
1413                                         NULL,
1414                                         NULL,
1415                                         NULL,
1416                                         NULL,
1417                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1418                                         MatDestroy_MPIDense,
1419                                         MatView_MPIDense,
1420                                         NULL,
1421                                         NULL,
1422                                 /* 64*/ NULL,
1423                                         NULL,
1424                                         NULL,
1425                                         NULL,
1426                                         NULL,
1427                                 /* 69*/ NULL,
1428                                         NULL,
1429                                         NULL,
1430                                         NULL,
1431                                         NULL,
1432                                 /* 74*/ NULL,
1433                                         NULL,
1434                                         NULL,
1435                                         NULL,
1436                                         NULL,
1437                                 /* 79*/ NULL,
1438                                         NULL,
1439                                         NULL,
1440                                         NULL,
1441                                 /* 83*/ MatLoad_MPIDense,
1442                                         NULL,
1443                                         NULL,
1444                                         NULL,
1445                                         NULL,
1446                                         NULL,
1447                                 /* 89*/ NULL,
1448                                         NULL,
1449                                         NULL,
1450                                         NULL,
1451                                         NULL,
1452                                 /* 94*/ NULL,
1453                                         NULL,
1454                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1455                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1456                                         NULL,
1457                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1458                                         NULL,
1459                                         NULL,
1460                                         MatConjugate_MPIDense,
1461                                         NULL,
1462                                 /*104*/ NULL,
1463                                         MatRealPart_MPIDense,
1464                                         MatImaginaryPart_MPIDense,
1465                                         NULL,
1466                                         NULL,
1467                                 /*109*/ NULL,
1468                                         NULL,
1469                                         NULL,
1470                                         MatGetColumnVector_MPIDense,
1471                                         MatMissingDiagonal_MPIDense,
1472                                 /*114*/ NULL,
1473                                         NULL,
1474                                         NULL,
1475                                         NULL,
1476                                         NULL,
1477                                 /*119*/ NULL,
1478                                         NULL,
1479                                         NULL,
1480                                         NULL,
1481                                         NULL,
1482                                 /*124*/ NULL,
1483                                         MatGetColumnNorms_MPIDense,
1484                                         NULL,
1485                                         NULL,
1486                                         NULL,
1487                                 /*129*/ NULL,
1488                                         NULL,
1489                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1490                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1491                                         NULL,
1492                                 /*134*/ NULL,
1493                                         NULL,
1494                                         NULL,
1495                                         NULL,
1496                                         NULL,
1497                                 /*139*/ NULL,
1498                                         NULL,
1499                                         NULL,
1500                                         NULL,
1501                                         NULL,
1502                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1503                                 /*145*/ NULL,
1504                                         NULL,
1505                                         NULL
1506 };
1507 
1508 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1509 {
1510   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1511   PetscBool      iscuda;
1512   PetscErrorCode ierr;
1513 
1514   PetscFunctionBegin;
1515   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1516   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1517   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1518   if (!a->A) {
1519     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1520     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1521     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1522   }
1523   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1524   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1525   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1526   mat->preallocated = PETSC_TRUE;
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #if defined(PETSC_HAVE_ELEMENTAL)
1531 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1532 {
1533   Mat            mat_elemental;
1534   PetscErrorCode ierr;
1535   PetscScalar    *v;
1536   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1537 
1538   PetscFunctionBegin;
1539   if (reuse == MAT_REUSE_MATRIX) {
1540     mat_elemental = *newmat;
1541     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1542   } else {
1543     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1544     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1545     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1546     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1547     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1548   }
1549 
1550   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1551   for (i=0; i<N; i++) cols[i] = i;
1552   for (i=0; i<m; i++) rows[i] = rstart + i;
1553 
1554   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1555   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1556   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1557   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1558   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1559   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1560   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1561 
1562   if (reuse == MAT_INPLACE_MATRIX) {
1563     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1564   } else {
1565     *newmat = mat_elemental;
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 #endif
1570 
1571 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1572 {
1573   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1574   PetscErrorCode ierr;
1575 
1576   PetscFunctionBegin;
1577   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1582 {
1583   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1584   PetscErrorCode ierr;
1585 
1586   PetscFunctionBegin;
1587   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1592 {
1593   PetscErrorCode ierr;
1594   Mat_MPIDense   *mat;
1595   PetscInt       m,nloc,N;
1596 
1597   PetscFunctionBegin;
1598   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1599   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1600   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1601     PetscInt sum;
1602 
1603     if (n == PETSC_DECIDE) {
1604       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1605     }
1606     /* Check sum(n) = N */
1607     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1608     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1609 
1610     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1611     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
1612   }
1613 
1614   /* numeric phase */
1615   mat = (Mat_MPIDense*)(*outmat)->data;
1616   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1617   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1618   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 #if defined(PETSC_HAVE_CUDA)
1623 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1624 {
1625   Mat            B;
1626   Mat_MPIDense   *m;
1627   PetscErrorCode ierr;
1628 
1629   PetscFunctionBegin;
1630   if (reuse == MAT_INITIAL_MATRIX) {
1631     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1632   } else if (reuse == MAT_REUSE_MATRIX) {
1633     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1634   }
1635 
1636   B    = *newmat;
1637   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1638   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1639   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1640   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1649   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1654   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1655   m    = (Mat_MPIDense*)(B)->data;
1656   if (m->A) {
1657     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1658     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1659   }
1660   B->ops->bindtocpu = NULL;
1661   B->offloadmask    = PETSC_OFFLOAD_CPU;
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1666 {
1667   Mat            B;
1668   Mat_MPIDense   *m;
1669   PetscErrorCode ierr;
1670 
1671   PetscFunctionBegin;
1672   if (reuse == MAT_INITIAL_MATRIX) {
1673     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1674   } else if (reuse == MAT_REUSE_MATRIX) {
1675     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1676   }
1677 
1678   B    = *newmat;
1679   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1680   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1681   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1696   m    = (Mat_MPIDense*)(B)->data;
1697   if (m->A) {
1698     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1699     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1700     B->offloadmask = PETSC_OFFLOAD_BOTH;
1701   } else {
1702     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1703   }
1704   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1705 
1706   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1707   PetscFunctionReturn(0);
1708 }
1709 #endif
1710 
1711 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1712 {
1713   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1714   PetscErrorCode ierr;
1715   PetscInt       lda;
1716 
1717   PetscFunctionBegin;
1718   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1719   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1720   if (!a->cvec) {
1721     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1722   }
1723   a->vecinuse = col + 1;
1724   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1725   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1726   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1727   *v   = a->cvec;
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1732 {
1733   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1734   PetscErrorCode ierr;
1735 
1736   PetscFunctionBegin;
1737   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1738   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1739   a->vecinuse = 0;
1740   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1741   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1742   *v   = NULL;
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1747 {
1748   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1749   PetscErrorCode ierr;
1750   PetscInt       lda;
1751 
1752   PetscFunctionBegin;
1753   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1754   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1755   if (!a->cvec) {
1756     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1757   }
1758   a->vecinuse = col + 1;
1759   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1760   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1761   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1762   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1763   *v   = a->cvec;
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1768 {
1769   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1770   PetscErrorCode ierr;
1771 
1772   PetscFunctionBegin;
1773   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1774   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1775   a->vecinuse = 0;
1776   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1777   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1778   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1779   *v   = NULL;
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1784 {
1785   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1786   PetscErrorCode ierr;
1787   PetscInt       lda;
1788 
1789   PetscFunctionBegin;
1790   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1791   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1792   if (!a->cvec) {
1793     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1794   }
1795   a->vecinuse = col + 1;
1796   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1797   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1798   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1799   *v   = a->cvec;
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1804 {
1805   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1806   PetscErrorCode ierr;
1807 
1808   PetscFunctionBegin;
1809   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1810   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1811   a->vecinuse = 0;
1812   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1813   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1814   *v   = NULL;
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1819 {
1820   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1821   Mat_MPIDense   *c;
1822   PetscErrorCode ierr;
1823   MPI_Comm       comm;
1824   PetscBool      setup = PETSC_FALSE;
1825 
1826   PetscFunctionBegin;
1827   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1828   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1829   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1830   if (!a->cmat) {
1831     setup = PETSC_TRUE;
1832     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1833     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1834     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1835     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1836     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1837     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1838   } else if (cend-cbegin != a->cmat->cmap->N) {
1839     setup = PETSC_TRUE;
1840     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1841     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1842     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1843     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1844   }
1845   c = (Mat_MPIDense*)a->cmat->data;
1846   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1847   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1848   if (setup) { /* do we really need this? */
1849     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1850   }
1851   a->cmat->preallocated = PETSC_TRUE;
1852   a->cmat->assembled = PETSC_TRUE;
1853   a->matinuse = cbegin + 1;
1854   *v = a->cmat;
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1859 {
1860   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1861   Mat_MPIDense   *c;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1866   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1867   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1868   a->matinuse = 0;
1869   c    = (Mat_MPIDense*)a->cmat->data;
1870   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1871   *v   = NULL;
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 /*MC
1876    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1877 
1878    Options Database Keys:
1879 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1880 
1881   Level: beginner
1882 
1883 .seealso: MatCreateDense()
1884 
1885 M*/
1886 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1887 {
1888   Mat_MPIDense   *a;
1889   PetscErrorCode ierr;
1890 
1891   PetscFunctionBegin;
1892   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1893   mat->data = (void*)a;
1894   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1895 
1896   mat->insertmode = NOT_SET_VALUES;
1897 
1898   /* build cache for off array entries formed */
1899   a->donotstash = PETSC_FALSE;
1900 
1901   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1902 
1903   /* stuff used for matrix vector multiply */
1904   a->lvec        = NULL;
1905   a->Mvctx       = NULL;
1906   a->roworiented = PETSC_TRUE;
1907 
1908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1922   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1923   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1924   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1925   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1926   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1927 #if defined(PETSC_HAVE_ELEMENTAL)
1928   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1929 #endif
1930 #if defined(PETSC_HAVE_SCALAPACK)
1931   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1932 #endif
1933 #if defined(PETSC_HAVE_CUDA)
1934   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1935 #endif
1936   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1937   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1938   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1939 #if defined(PETSC_HAVE_CUDA)
1940   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1941   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1942 #endif
1943 
1944   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1945   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1946   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 /*MC
1951    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1952 
1953    Options Database Keys:
1954 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1955 
1956   Level: beginner
1957 
1958 .seealso:
1959 
1960 M*/
1961 #if defined(PETSC_HAVE_CUDA)
1962 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1963 {
1964   PetscErrorCode ierr;
1965 
1966   PetscFunctionBegin;
1967   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
1968   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1969   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1970   PetscFunctionReturn(0);
1971 }
1972 #endif
1973 
1974 /*MC
1975    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1976 
1977    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1978    and MATMPIDENSE otherwise.
1979 
1980    Options Database Keys:
1981 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1982 
1983   Level: beginner
1984 
1985 
1986 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1987 M*/
1988 
1989 /*MC
1990    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1991 
1992    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1993    and MATMPIDENSECUDA otherwise.
1994 
1995    Options Database Keys:
1996 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1997 
1998   Level: beginner
1999 
2000 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
2001 M*/
2002 
2003 /*@C
2004    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2005 
2006    Collective
2007 
2008    Input Parameters:
2009 .  B - the matrix
2010 -  data - optional location of matrix data.  Set data=NULL for PETSc
2011    to control all matrix memory allocation.
2012 
2013    Notes:
2014    The dense format is fully compatible with standard Fortran 77
2015    storage by columns.
2016 
2017    The data input variable is intended primarily for Fortran programmers
2018    who wish to allocate their own matrix memory space.  Most users should
2019    set data=NULL.
2020 
2021    Level: intermediate
2022 
2023 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2024 @*/
2025 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2026 {
2027   PetscErrorCode ierr;
2028 
2029   PetscFunctionBegin;
2030   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2031   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 /*@
2036    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2037    array provided by the user. This is useful to avoid copying an array
2038    into a matrix
2039 
2040    Not Collective
2041 
2042    Input Parameters:
2043 +  mat - the matrix
2044 -  array - the array in column major order
2045 
2046    Notes:
2047    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2048    freed when the matrix is destroyed.
2049 
2050    Level: developer
2051 
2052 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2053 
2054 @*/
2055 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2056 {
2057   PetscErrorCode ierr;
2058 
2059   PetscFunctionBegin;
2060   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2061   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2062   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2063 #if defined(PETSC_HAVE_CUDA)
2064   mat->offloadmask = PETSC_OFFLOAD_CPU;
2065 #endif
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 /*@
2070    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2071 
2072    Not Collective
2073 
2074    Input Parameters:
2075 .  mat - the matrix
2076 
2077    Notes:
2078    You can only call this after a call to MatDensePlaceArray()
2079 
2080    Level: developer
2081 
2082 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2083 
2084 @*/
2085 PetscErrorCode  MatDenseResetArray(Mat mat)
2086 {
2087   PetscErrorCode ierr;
2088 
2089   PetscFunctionBegin;
2090   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2091   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2092   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 /*@
2097    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2098    array provided by the user. This is useful to avoid copying an array
2099    into a matrix
2100 
2101    Not Collective
2102 
2103    Input Parameters:
2104 +  mat - the matrix
2105 -  array - the array in column major order
2106 
2107    Notes:
2108    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2109    freed by the user. It will be freed when the matrix is destroyed.
2110 
2111    Level: developer
2112 
2113 .seealso: MatDenseGetArray(), VecReplaceArray()
2114 @*/
2115 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2116 {
2117   PetscErrorCode ierr;
2118 
2119   PetscFunctionBegin;
2120   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2121   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2122   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2123 #if defined(PETSC_HAVE_CUDA)
2124   mat->offloadmask = PETSC_OFFLOAD_CPU;
2125 #endif
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 #if defined(PETSC_HAVE_CUDA)
2130 /*@C
2131    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2132    array provided by the user. This is useful to avoid copying an array
2133    into a matrix
2134 
2135    Not Collective
2136 
2137    Input Parameters:
2138 +  mat - the matrix
2139 -  array - the array in column major order
2140 
2141    Notes:
2142    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2143    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2144 
2145    Level: developer
2146 
2147 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2148 @*/
2149 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2150 {
2151   PetscErrorCode ierr;
2152 
2153   PetscFunctionBegin;
2154   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2155   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2156   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2157   mat->offloadmask = PETSC_OFFLOAD_GPU;
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 /*@C
2162    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2163 
2164    Not Collective
2165 
2166    Input Parameters:
2167 .  mat - the matrix
2168 
2169    Notes:
2170    You can only call this after a call to MatDenseCUDAPlaceArray()
2171 
2172    Level: developer
2173 
2174 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2175 
2176 @*/
2177 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2178 {
2179   PetscErrorCode ierr;
2180 
2181   PetscFunctionBegin;
2182   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2183   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2184   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 /*@C
2189    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2190    array provided by the user. This is useful to avoid copying an array
2191    into a matrix
2192 
2193    Not Collective
2194 
2195    Input Parameters:
2196 +  mat - the matrix
2197 -  array - the array in column major order
2198 
2199    Notes:
2200    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2201    The memory passed in CANNOT be freed by the user. It will be freed
2202    when the matrix is destroyed. The array should respect the matrix leading dimension.
2203 
2204    Level: developer
2205 
2206 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2207 @*/
2208 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2209 {
2210   PetscErrorCode ierr;
2211 
2212   PetscFunctionBegin;
2213   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2214   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2215   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2216   mat->offloadmask = PETSC_OFFLOAD_GPU;
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 /*@C
2221    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2222 
2223    Not Collective
2224 
2225    Input Parameters:
2226 .  A - the matrix
2227 
2228    Output Parameters
2229 .  array - the GPU array in column major order
2230 
2231    Notes:
2232    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2233 
2234    Level: developer
2235 
2236 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2237 @*/
2238 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2239 {
2240   PetscErrorCode ierr;
2241 
2242   PetscFunctionBegin;
2243   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2244   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2245   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 /*@C
2250    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2251 
2252    Not Collective
2253 
2254    Input Parameters:
2255 +  A - the matrix
2256 -  array - the GPU array in column major order
2257 
2258    Notes:
2259 
2260    Level: developer
2261 
2262 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2263 @*/
2264 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2265 {
2266   PetscErrorCode ierr;
2267 
2268   PetscFunctionBegin;
2269   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2270   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2271   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2272   A->offloadmask = PETSC_OFFLOAD_GPU;
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 /*@C
2277    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2278 
2279    Not Collective
2280 
2281    Input Parameters:
2282 .  A - the matrix
2283 
2284    Output Parameters
2285 .  array - the GPU array in column major order
2286 
2287    Notes:
2288    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2289 
2290    Level: developer
2291 
2292 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2293 @*/
2294 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2295 {
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2300   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 /*@C
2305    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2306 
2307    Not Collective
2308 
2309    Input Parameters:
2310 +  A - the matrix
2311 -  array - the GPU array in column major order
2312 
2313    Notes:
2314    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2315 
2316    Level: developer
2317 
2318 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2319 @*/
2320 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2321 {
2322   PetscErrorCode ierr;
2323 
2324   PetscFunctionBegin;
2325   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 /*@C
2330    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2331 
2332    Not Collective
2333 
2334    Input Parameters:
2335 .  A - the matrix
2336 
2337    Output Parameters
2338 .  array - the GPU array in column major order
2339 
2340    Notes:
2341    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2342 
2343    Level: developer
2344 
2345 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2346 @*/
2347 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2348 {
2349   PetscErrorCode ierr;
2350 
2351   PetscFunctionBegin;
2352   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2353   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2354   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 /*@C
2359    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2360 
2361    Not Collective
2362 
2363    Input Parameters:
2364 +  A - the matrix
2365 -  array - the GPU array in column major order
2366 
2367    Notes:
2368 
2369    Level: developer
2370 
2371 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2372 @*/
2373 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2374 {
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2379   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2380   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2381   A->offloadmask = PETSC_OFFLOAD_GPU;
2382   PetscFunctionReturn(0);
2383 }
2384 #endif
2385 
2386 /*@C
2387    MatCreateDense - Creates a matrix in dense format.
2388 
2389    Collective
2390 
2391    Input Parameters:
2392 +  comm - MPI communicator
2393 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2394 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2395 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2396 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2397 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2398    to control all matrix memory allocation.
2399 
2400    Output Parameter:
2401 .  A - the matrix
2402 
2403    Notes:
2404    The dense format is fully compatible with standard Fortran 77
2405    storage by columns.
2406    Note that, although local portions of the matrix are stored in column-major
2407    order, the matrix is partitioned across MPI ranks by row.
2408 
2409    The data input variable is intended primarily for Fortran programmers
2410    who wish to allocate their own matrix memory space.  Most users should
2411    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2412 
2413    The user MUST specify either the local or global matrix dimensions
2414    (possibly both).
2415 
2416    Level: intermediate
2417 
2418 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2419 @*/
2420 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2421 {
2422   PetscErrorCode ierr;
2423   PetscMPIInt    size;
2424 
2425   PetscFunctionBegin;
2426   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2427   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2428   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2429   if (size > 1) {
2430     PetscBool havedata = (PetscBool)!!data;
2431 
2432     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2433     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2434     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr);
2435     if (havedata) {  /* user provided data array, so no need to assemble */
2436       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2437       (*A)->assembled = PETSC_TRUE;
2438     }
2439   } else {
2440     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2441     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2442   }
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 #if defined(PETSC_HAVE_CUDA)
2447 /*@C
2448    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2449 
2450    Collective
2451 
2452    Input Parameters:
2453 +  comm - MPI communicator
2454 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2455 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2456 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2457 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2458 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2459    to control matrix memory allocation.
2460 
2461    Output Parameter:
2462 .  A - the matrix
2463 
2464    Notes:
2465 
2466    Level: intermediate
2467 
2468 .seealso: MatCreate(), MatCreateDense()
2469 @*/
2470 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2471 {
2472   PetscErrorCode ierr;
2473   PetscMPIInt    size;
2474 
2475   PetscFunctionBegin;
2476   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2477   PetscValidLogicalCollectiveBool(*A,!!data,6);
2478   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2479   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2480   if (size > 1) {
2481     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2482     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2483     if (data) {  /* user provided data array, so no need to assemble */
2484       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2485       (*A)->assembled = PETSC_TRUE;
2486     }
2487   } else {
2488     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2489     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2490   }
2491   PetscFunctionReturn(0);
2492 }
2493 #endif
2494 
2495 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2496 {
2497   Mat            mat;
2498   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2499   PetscErrorCode ierr;
2500 
2501   PetscFunctionBegin;
2502   *newmat = NULL;
2503   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2504   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2505   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2506   a       = (Mat_MPIDense*)mat->data;
2507 
2508   mat->factortype   = A->factortype;
2509   mat->assembled    = PETSC_TRUE;
2510   mat->preallocated = PETSC_TRUE;
2511 
2512   mat->insertmode = NOT_SET_VALUES;
2513   a->donotstash   = oldmat->donotstash;
2514 
2515   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2516   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2517 
2518   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2519   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2520   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2521 
2522   *newmat = mat;
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2527 {
2528   PetscErrorCode ierr;
2529   PetscBool      isbinary;
2530 #if defined(PETSC_HAVE_HDF5)
2531   PetscBool      ishdf5;
2532 #endif
2533 
2534   PetscFunctionBegin;
2535   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2536   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2537   /* force binary viewer to load .info file if it has not yet done so */
2538   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2539   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2540 #if defined(PETSC_HAVE_HDF5)
2541   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2542 #endif
2543   if (isbinary) {
2544     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2545 #if defined(PETSC_HAVE_HDF5)
2546   } else if (ishdf5) {
2547     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2548 #endif
2549   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2554 {
2555   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2556   Mat            a,b;
2557   PetscBool      flg;
2558   PetscErrorCode ierr;
2559 
2560   PetscFunctionBegin;
2561   a    = matA->A;
2562   b    = matB->A;
2563   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2564   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2569 {
2570   PetscErrorCode        ierr;
2571   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2572 
2573   PetscFunctionBegin;
2574   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2575   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2576   ierr = PetscFree(atb);CHKERRQ(ierr);
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2581 {
2582   PetscErrorCode        ierr;
2583   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2584 
2585   PetscFunctionBegin;
2586   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2587   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2588   ierr = PetscFree(abt);CHKERRQ(ierr);
2589   PetscFunctionReturn(0);
2590 }
2591 
2592 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2593 {
2594   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2595   Mat_TransMatMultDense *atb;
2596   PetscErrorCode        ierr;
2597   MPI_Comm              comm;
2598   PetscMPIInt           size,*recvcounts;
2599   PetscScalar           *carray,*sendbuf;
2600   const PetscScalar     *atbarray;
2601   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2602   const PetscInt        *ranges;
2603 
2604   PetscFunctionBegin;
2605   MatCheckProduct(C,3);
2606   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2607   atb = (Mat_TransMatMultDense *)C->product->data;
2608   recvcounts = atb->recvcounts;
2609   sendbuf = atb->sendbuf;
2610 
2611   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2612   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2613 
2614   /* compute atbarray = aseq^T * bseq */
2615   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2616 
2617   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2618 
2619   /* arrange atbarray into sendbuf */
2620   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2621   for (proc=0, k=0; proc<size; proc++) {
2622     for (j=0; j<cN; j++) {
2623       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2624     }
2625   }
2626   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2627 
2628   /* sum all atbarray to local values of C */
2629   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2630   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr);
2631   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2632   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2633   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2638 {
2639   PetscErrorCode        ierr;
2640   MPI_Comm              comm;
2641   PetscMPIInt           size;
2642   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2643   Mat_TransMatMultDense *atb;
2644   PetscBool             cisdense;
2645   PetscInt              i;
2646   const PetscInt        *ranges;
2647 
2648   PetscFunctionBegin;
2649   MatCheckProduct(C,3);
2650   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2651   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2652   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2653     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2654   }
2655 
2656   /* create matrix product C */
2657   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2658   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2659   if (!cisdense) {
2660     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2661   }
2662   ierr = MatSetUp(C);CHKERRQ(ierr);
2663 
2664   /* create data structure for reuse C */
2665   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2666   ierr = PetscNew(&atb);CHKERRQ(ierr);
2667   cM   = C->rmap->N;
2668   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2669   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2670   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2671 
2672   C->product->data    = atb;
2673   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2678 {
2679   PetscErrorCode        ierr;
2680   MPI_Comm              comm;
2681   PetscMPIInt           i, size;
2682   PetscInt              maxRows, bufsiz;
2683   PetscMPIInt           tag;
2684   PetscInt              alg;
2685   Mat_MatTransMultDense *abt;
2686   Mat_Product           *product = C->product;
2687   PetscBool             flg;
2688 
2689   PetscFunctionBegin;
2690   MatCheckProduct(C,4);
2691   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2692   /* check local size of A and B */
2693   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2694 
2695   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2696   alg  = flg ? 0 : 1;
2697 
2698   /* setup matrix product C */
2699   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2700   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2701   ierr = MatSetUp(C);CHKERRQ(ierr);
2702   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2703 
2704   /* create data structure for reuse C */
2705   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2706   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2707   ierr = PetscNew(&abt);CHKERRQ(ierr);
2708   abt->tag = tag;
2709   abt->alg = alg;
2710   switch (alg) {
2711   case 1: /* alg: "cyclic" */
2712     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2713     bufsiz = A->cmap->N * maxRows;
2714     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2715     break;
2716   default: /* alg: "allgatherv" */
2717     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2718     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2719     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2720     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2721     break;
2722   }
2723 
2724   C->product->data    = abt;
2725   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2726   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2727   PetscFunctionReturn(0);
2728 }
2729 
2730 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2731 {
2732   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2733   Mat_MatTransMultDense *abt;
2734   PetscErrorCode        ierr;
2735   MPI_Comm              comm;
2736   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2737   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2738   PetscInt              i,cK=A->cmap->N,k,j,bn;
2739   PetscScalar           _DOne=1.0,_DZero=0.0;
2740   const PetscScalar     *av,*bv;
2741   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2742   MPI_Request           reqs[2];
2743   const PetscInt        *ranges;
2744 
2745   PetscFunctionBegin;
2746   MatCheckProduct(C,3);
2747   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2748   abt  = (Mat_MatTransMultDense*)C->product->data;
2749   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2750   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2751   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2752   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2753   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2754   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2755   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2756   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2757   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2758   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2759   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2760   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2761   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2762   bn   = B->rmap->n;
2763   if (blda == bn) {
2764     sendbuf = (PetscScalar*)bv;
2765   } else {
2766     sendbuf = abt->buf[0];
2767     for (k = 0, i = 0; i < cK; i++) {
2768       for (j = 0; j < bn; j++, k++) {
2769         sendbuf[k] = bv[i * blda + j];
2770       }
2771     }
2772   }
2773   if (size > 1) {
2774     sendto = (rank + size - 1) % size;
2775     recvfrom = (rank + size + 1) % size;
2776   } else {
2777     sendto = recvfrom = 0;
2778   }
2779   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2780   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2781   recvisfrom = rank;
2782   for (i = 0; i < size; i++) {
2783     /* we have finished receiving in sending, bufs can be read/modified */
2784     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2785     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2786 
2787     if (nextrecvisfrom != rank) {
2788       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2789       sendsiz = cK * bn;
2790       recvsiz = cK * nextbn;
2791       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2792       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr);
2793       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr);
2794     }
2795 
2796     /* local aseq * sendbuf^T */
2797     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2798     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2799 
2800     if (nextrecvisfrom != rank) {
2801       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2802       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
2803     }
2804     bn = nextbn;
2805     recvisfrom = nextrecvisfrom;
2806     sendbuf = recvbuf;
2807   }
2808   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2809   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2810   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2811   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2812   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2817 {
2818   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2819   Mat_MatTransMultDense *abt;
2820   PetscErrorCode        ierr;
2821   MPI_Comm              comm;
2822   PetscMPIInt           size;
2823   PetscScalar           *cv, *sendbuf, *recvbuf;
2824   const PetscScalar     *av,*bv;
2825   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2826   PetscScalar           _DOne=1.0,_DZero=0.0;
2827   PetscBLASInt          cm, cn, ck, alda, clda;
2828 
2829   PetscFunctionBegin;
2830   MatCheckProduct(C,3);
2831   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2832   abt  = (Mat_MatTransMultDense*)C->product->data;
2833   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2834   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2835   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2836   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2837   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2838   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2839   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2840   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2841   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2842   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2843   /* copy transpose of B into buf[0] */
2844   bn      = B->rmap->n;
2845   sendbuf = abt->buf[0];
2846   recvbuf = abt->buf[1];
2847   for (k = 0, j = 0; j < bn; j++) {
2848     for (i = 0; i < cK; i++, k++) {
2849       sendbuf[k] = bv[i * blda + j];
2850     }
2851   }
2852   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2853   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr);
2854   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2855   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2856   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2857   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2858   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2859   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2860   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2861   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2867 {
2868   Mat_MatTransMultDense *abt;
2869   PetscErrorCode        ierr;
2870 
2871   PetscFunctionBegin;
2872   MatCheckProduct(C,3);
2873   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2874   abt = (Mat_MatTransMultDense*)C->product->data;
2875   switch (abt->alg) {
2876   case 1:
2877     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2878     break;
2879   default:
2880     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2881     break;
2882   }
2883   PetscFunctionReturn(0);
2884 }
2885 
2886 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2887 {
2888   PetscErrorCode   ierr;
2889   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2890 
2891   PetscFunctionBegin;
2892   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2893   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2894   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2895   ierr = PetscFree(ab);CHKERRQ(ierr);
2896   PetscFunctionReturn(0);
2897 }
2898 
2899 #if defined(PETSC_HAVE_ELEMENTAL)
2900 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2901 {
2902   PetscErrorCode   ierr;
2903   Mat_MatMultDense *ab;
2904 
2905   PetscFunctionBegin;
2906   MatCheckProduct(C,3);
2907   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2908   ab   = (Mat_MatMultDense*)C->product->data;
2909   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2910   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2911   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2912   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2913   PetscFunctionReturn(0);
2914 }
2915 
2916 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2917 {
2918   PetscErrorCode   ierr;
2919   Mat              Ae,Be,Ce;
2920   Mat_MatMultDense *ab;
2921 
2922   PetscFunctionBegin;
2923   MatCheckProduct(C,4);
2924   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2925   /* check local size of A and B */
2926   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2927     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2928   }
2929 
2930   /* create elemental matrices Ae and Be */
2931   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2932   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2933   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2934   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2935   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2936 
2937   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2938   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2939   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2940   ierr = MatSetUp(Be);CHKERRQ(ierr);
2941   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2942 
2943   /* compute symbolic Ce = Ae*Be */
2944   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2945   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2946 
2947   /* setup C */
2948   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2949   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2950   ierr = MatSetUp(C);CHKERRQ(ierr);
2951 
2952   /* create data structure for reuse Cdense */
2953   ierr = PetscNew(&ab);CHKERRQ(ierr);
2954   ab->Ae = Ae;
2955   ab->Be = Be;
2956   ab->Ce = Ce;
2957 
2958   C->product->data    = ab;
2959   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2960   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2961   PetscFunctionReturn(0);
2962 }
2963 #endif
2964 /* ----------------------------------------------- */
2965 #if defined(PETSC_HAVE_ELEMENTAL)
2966 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2967 {
2968   PetscFunctionBegin;
2969   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2970   C->ops->productsymbolic = MatProductSymbolic_AB;
2971   PetscFunctionReturn(0);
2972 }
2973 #endif
2974 
2975 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2976 {
2977   Mat_Product *product = C->product;
2978   Mat         A = product->A,B=product->B;
2979 
2980   PetscFunctionBegin;
2981   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2982     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2983   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2984   C->ops->productsymbolic = MatProductSymbolic_AtB;
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2989 {
2990   PetscErrorCode ierr;
2991   Mat_Product    *product = C->product;
2992   const char     *algTypes[2] = {"allgatherv","cyclic"};
2993   PetscInt       alg,nalg = 2;
2994   PetscBool      flg = PETSC_FALSE;
2995 
2996   PetscFunctionBegin;
2997   /* Set default algorithm */
2998   alg = 0; /* default is allgatherv */
2999   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
3000   if (flg) {
3001     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3002   }
3003 
3004   /* Get runtime option */
3005   if (product->api_user) {
3006     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
3007     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3008     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3009   } else {
3010     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
3011     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3012     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3013   }
3014   if (flg) {
3015     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3016   }
3017 
3018   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3019   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3020   PetscFunctionReturn(0);
3021 }
3022 
3023 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3024 {
3025   PetscErrorCode ierr;
3026   Mat_Product    *product = C->product;
3027 
3028   PetscFunctionBegin;
3029   switch (product->type) {
3030 #if defined(PETSC_HAVE_ELEMENTAL)
3031   case MATPRODUCT_AB:
3032     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3033     break;
3034 #endif
3035   case MATPRODUCT_AtB:
3036     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3037     break;
3038   case MATPRODUCT_ABt:
3039     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3040     break;
3041   default:
3042     break;
3043   }
3044   PetscFunctionReturn(0);
3045 }
3046