xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision efa12513287cff49a2b9648ae83199dcbfaad71a)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
46   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
47   PetscErrorCode ierr;
48 
49   PetscFunctionBegin;
50   ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
55 {
56   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
57   PetscErrorCode ierr;
58   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
59 
60   PetscFunctionBegin;
61   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
62   lrow = row - rstart;
63   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
68 {
69   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
70   PetscErrorCode ierr;
71   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
72 
73   PetscFunctionBegin;
74   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
75   lrow = row - rstart;
76   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
81 {
82   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
83   PetscErrorCode ierr;
84   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
85   PetscScalar    *array;
86   MPI_Comm       comm;
87   PetscBool      flg;
88   Mat            B;
89 
90   PetscFunctionBegin;
91   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
92   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
93   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
94   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
95 
96     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
97     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
98     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
99     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
100     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
101     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
102     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
103     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
104     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
105     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
106     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
107     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
108     *a   = B;
109     ierr = MatDestroy(&B);CHKERRQ(ierr);
110   } else *a = B;
111   PetscFunctionReturn(0);
112 }
113 
114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
117   PetscErrorCode ierr;
118   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
119   PetscBool      roworiented = A->roworiented;
120 
121   PetscFunctionBegin;
122   for (i=0; i<m; i++) {
123     if (idxm[i] < 0) continue;
124     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
125     if (idxm[i] >= rstart && idxm[i] < rend) {
126       row = idxm[i] - rstart;
127       if (roworiented) {
128         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
129       } else {
130         for (j=0; j<n; j++) {
131           if (idxn[j] < 0) continue;
132           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
133           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
134         }
135       }
136     } else if (!A->donotstash) {
137       mat->assembled = PETSC_FALSE;
138       if (roworiented) {
139         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
140       } else {
141         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
142       }
143     }
144   }
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
149 {
150   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
151   PetscErrorCode ierr;
152   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
153 
154   PetscFunctionBegin;
155   for (i=0; i<m; i++) {
156     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
157     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
158     if (idxm[i] >= rstart && idxm[i] < rend) {
159       row = idxm[i] - rstart;
160       for (j=0; j<n; j++) {
161         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
162         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
163         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
164       }
165     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
181 {
182   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
183   PetscBool      iscuda;
184   PetscErrorCode ierr;
185 
186   PetscFunctionBegin;
187   if (!a->A) {
188     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
189     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
190     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
191     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
192     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
193     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
194     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
195     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
196   }
197   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
230   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
235 {
236   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
237   PetscErrorCode ierr;
238 
239   PetscFunctionBegin;
240   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
241   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
242   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
247 {
248   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
253   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
254   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
259 {
260   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
261   PetscErrorCode ierr;
262 
263   PetscFunctionBegin;
264   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
265   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
266   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
271 {
272   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
273   PetscErrorCode    ierr;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
284   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
285   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
288   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
289   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
290   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
291   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
292   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
299   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
309     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
310     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
311     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
317   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
318   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
326   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
327 
328   /* Assemble the matrices so that the correct flags are set */
329   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331 
332   /* Free work space */
333   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
334   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
335   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343   PetscErrorCode ierr;
344 
345   PetscFunctionBegin;
346   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
351 {
352   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
361 {
362   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
363   PetscErrorCode ierr;
364 
365   PetscFunctionBegin;
366   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
371 {
372   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
373   PetscErrorCode ierr;
374   PetscInt       nstash,reallocs;
375 
376   PetscFunctionBegin;
377   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
378 
379   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
380   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
381   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
386 {
387   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
388   PetscErrorCode ierr;
389   PetscInt       i,*row,*col,flg,j,rstart,ncols;
390   PetscMPIInt    n;
391   PetscScalar    *val;
392 
393   PetscFunctionBegin;
394   if (!mdn->donotstash && !mat->nooffprocentries) {
395     /*  wait on receives */
396     while (1) {
397       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
398       if (!flg) break;
399 
400       for (i=0; i<n;) {
401         /* Now identify the consecutive vals belonging to the same row */
402         for (j=i,rstart=row[j]; j<n; j++) {
403           if (row[j] != rstart) break;
404         }
405         if (j < n) ncols = j-i;
406         else       ncols = n-i;
407         /* Now assemble all these values with a single function call */
408         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
409         i    = j;
410       }
411     }
412     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
413   }
414 
415   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
417 
418   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
419     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
425 {
426   PetscErrorCode ierr;
427   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
428 
429   PetscFunctionBegin;
430   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
435 {
436   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
437   PetscErrorCode    ierr;
438   PetscInt          i,len,*lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
449     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
450     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
451     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
452     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
453   }
454   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
455   if (diag != 0.0) {
456     Vec d;
457 
458     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
459     ierr = VecSet(d,diag);CHKERRQ(ierr);
460     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
461     ierr = VecDestroy(&d);CHKERRQ(ierr);
462   }
463   ierr = PetscFree(lrows);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode    ierr;
476   const PetscScalar *ax;
477   PetscScalar       *ay;
478   PetscMemType      axmtype,aymtype;
479 
480   PetscFunctionBegin;
481   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
482   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
483   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr);
484   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
485   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
486   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
487   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
494   PetscErrorCode    ierr;
495   const PetscScalar *ax;
496   PetscScalar       *ay;
497   PetscMemType      axmtype,aymtype;
498 
499   PetscFunctionBegin;
500   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
501   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
502   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr);
503   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
504   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
505   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
506   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode    ierr;
514   const PetscScalar *ax;
515   PetscScalar       *ay;
516   PetscMemType      axmtype,aymtype;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
520   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
522   ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr);
523   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
524   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
525   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
526   ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
533   PetscErrorCode    ierr;
534   const PetscScalar *ax;
535   PetscScalar       *ay;
536   PetscMemType      axmtype,aymtype;
537 
538   PetscFunctionBegin;
539   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
540   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
541   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
542   ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr);
543   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
544   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
545   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
546   ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
551 {
552   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
553   PetscErrorCode    ierr;
554   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
555   PetscScalar       *x,zero = 0.0;
556   const PetscScalar *av;
557 
558   PetscFunctionBegin;
559   ierr = VecSet(v,zero);CHKERRQ(ierr);
560   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
561   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
562   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
563   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
564   radd = A->rmap->rstart*m;
565   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
566   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
567   for (i=0; i<len; i++) {
568     x[i] = av[radd + i*lda + i];
569   }
570   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
571   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatDestroy_MPIDense(Mat mat)
576 {
577   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
578   PetscErrorCode ierr;
579 
580   PetscFunctionBegin;
581 #if defined(PETSC_USE_LOG)
582   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
583 #endif
584   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
585   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
586   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
587   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
588   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
589   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
590   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
591   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
592 
593   ierr = PetscFree(mat->data);CHKERRQ(ierr);
594   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
595 
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
607 #if defined(PETSC_HAVE_ELEMENTAL)
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
609 #endif
610 #if defined(PETSC_HAVE_SCALAPACK)
611   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
612 #endif
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
614   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
615   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
616 #if defined (PETSC_HAVE_CUDA)
617   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
618   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
619 #endif
620   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
621   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
622   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
623   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
624   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
625   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
626   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
627   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
628   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
634 
635 #include <petscdraw.h>
636 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
637 {
638   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
639   PetscErrorCode    ierr;
640   PetscMPIInt       rank;
641   PetscViewerType   vtype;
642   PetscBool         iascii,isdraw;
643   PetscViewer       sviewer;
644   PetscViewerFormat format;
645 
646   PetscFunctionBegin;
647   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
648   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
649   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
650   if (iascii) {
651     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
652     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
653     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
654       MatInfo info;
655       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
656       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
657       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
658       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
659       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
660       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
661       PetscFunctionReturn(0);
662     } else if (format == PETSC_VIEWER_ASCII_INFO) {
663       PetscFunctionReturn(0);
664     }
665   } else if (isdraw) {
666     PetscDraw draw;
667     PetscBool isnull;
668 
669     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
670     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
671     if (isnull) PetscFunctionReturn(0);
672   }
673 
674   {
675     /* assemble the entire matrix onto first processor. */
676     Mat         A;
677     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
678     PetscInt    *cols;
679     PetscScalar *vals;
680 
681     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
682     if (!rank) {
683       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
684     } else {
685       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
686     }
687     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
688     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
689     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
690     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
691 
692     /* Copy the matrix ... This isn't the most efficient means,
693        but it's quick for now */
694     A->insertmode = INSERT_VALUES;
695 
696     row = mat->rmap->rstart;
697     m   = mdn->A->rmap->n;
698     for (i=0; i<m; i++) {
699       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
700       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
701       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
702       row++;
703     }
704 
705     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
708     if (!rank) {
709       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
710       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
711     }
712     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
713     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
714     ierr = MatDestroy(&A);CHKERRQ(ierr);
715   }
716   PetscFunctionReturn(0);
717 }
718 
719 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
720 {
721   PetscErrorCode ierr;
722   PetscBool      iascii,isbinary,isdraw,issocket;
723 
724   PetscFunctionBegin;
725   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
726   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
727   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
728   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
729 
730   if (iascii || issocket || isdraw) {
731     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
732   } else if (isbinary) {
733     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
734   }
735   PetscFunctionReturn(0);
736 }
737 
738 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
739 {
740   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
741   Mat            mdn  = mat->A;
742   PetscErrorCode ierr;
743   PetscLogDouble isend[5],irecv[5];
744 
745   PetscFunctionBegin;
746   info->block_size = 1.0;
747 
748   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
749 
750   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
751   isend[3] = info->memory;  isend[4] = info->mallocs;
752   if (flag == MAT_LOCAL) {
753     info->nz_used      = isend[0];
754     info->nz_allocated = isend[1];
755     info->nz_unneeded  = isend[2];
756     info->memory       = isend[3];
757     info->mallocs      = isend[4];
758   } else if (flag == MAT_GLOBAL_MAX) {
759     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
760 
761     info->nz_used      = irecv[0];
762     info->nz_allocated = irecv[1];
763     info->nz_unneeded  = irecv[2];
764     info->memory       = irecv[3];
765     info->mallocs      = irecv[4];
766   } else if (flag == MAT_GLOBAL_SUM) {
767     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
768 
769     info->nz_used      = irecv[0];
770     info->nz_allocated = irecv[1];
771     info->nz_unneeded  = irecv[2];
772     info->memory       = irecv[3];
773     info->mallocs      = irecv[4];
774   }
775   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
776   info->fill_ratio_needed = 0;
777   info->factor_mallocs    = 0;
778   PetscFunctionReturn(0);
779 }
780 
781 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
782 {
783   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
784   PetscErrorCode ierr;
785 
786   PetscFunctionBegin;
787   switch (op) {
788   case MAT_NEW_NONZERO_LOCATIONS:
789   case MAT_NEW_NONZERO_LOCATION_ERR:
790   case MAT_NEW_NONZERO_ALLOCATION_ERR:
791     MatCheckPreallocated(A,1);
792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
793     break;
794   case MAT_ROW_ORIENTED:
795     MatCheckPreallocated(A,1);
796     a->roworiented = flg;
797     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
798     break;
799   case MAT_FORCE_DIAGONAL_ENTRIES:
800   case MAT_KEEP_NONZERO_PATTERN:
801   case MAT_USE_HASH_TABLE:
802   case MAT_SORTED_FULL:
803     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
804     break;
805   case MAT_IGNORE_OFF_PROC_ENTRIES:
806     a->donotstash = flg;
807     break;
808   case MAT_SYMMETRIC:
809   case MAT_STRUCTURALLY_SYMMETRIC:
810   case MAT_HERMITIAN:
811   case MAT_SYMMETRY_ETERNAL:
812   case MAT_IGNORE_LOWER_TRIANGULAR:
813   case MAT_IGNORE_ZERO_ENTRIES:
814     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
815     break;
816   default:
817     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
818   }
819   PetscFunctionReturn(0);
820 }
821 
822 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
823 {
824   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
825   const PetscScalar *l;
826   PetscScalar       x,*v,*vv,*r;
827   PetscErrorCode    ierr;
828   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
829 
830   PetscFunctionBegin;
831   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
832   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
833   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
834   if (ll) {
835     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
836     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
837     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
838     for (i=0; i<m; i++) {
839       x = l[i];
840       v = vv + i;
841       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
842     }
843     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
844     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
845   }
846   if (rr) {
847     const PetscScalar *ar;
848 
849     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
850     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
851     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
852     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
853     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
854     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
855     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
856     for (i=0; i<n; i++) {
857       x = r[i];
858       v = vv + i*lda;
859       for (j=0; j<m; j++) (*v++) *= x;
860     }
861     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
862     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
863   }
864   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
869 {
870   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
871   PetscErrorCode    ierr;
872   PetscInt          i,j;
873   PetscMPIInt       size;
874   PetscReal         sum = 0.0;
875   const PetscScalar *av,*v;
876 
877   PetscFunctionBegin;
878   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
879   v    = av;
880   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
881   if (size == 1) {
882     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
883   } else {
884     if (type == NORM_FROBENIUS) {
885       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
886         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
887       }
888       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
889       *nrm = PetscSqrtReal(*nrm);
890       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
891     } else if (type == NORM_1) {
892       PetscReal *tmp,*tmp2;
893       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
894       *nrm = 0.0;
895       v    = av;
896       for (j=0; j<mdn->A->cmap->n; j++) {
897         for (i=0; i<mdn->A->rmap->n; i++) {
898           tmp[j] += PetscAbsScalar(*v);  v++;
899         }
900       }
901       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
902       for (j=0; j<A->cmap->N; j++) {
903         if (tmp2[j] > *nrm) *nrm = tmp2[j];
904       }
905       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
906       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
907     } else if (type == NORM_INFINITY) { /* max row norm */
908       PetscReal ntemp;
909       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
910       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
911     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
912   }
913   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
914   PetscFunctionReturn(0);
915 }
916 
917 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
918 {
919   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
920   Mat            B;
921   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
922   PetscErrorCode ierr;
923   PetscInt       j,i,lda;
924   PetscScalar    *v;
925 
926   PetscFunctionBegin;
927   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
928     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
929     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
930     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
931     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
932   } else B = *matout;
933 
934   m    = a->A->rmap->n; n = a->A->cmap->n;
935   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
936   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
937   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
938   for (i=0; i<m; i++) rwork[i] = rstart + i;
939   for (j=0; j<n; j++) {
940     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
941     v   += lda;
942   }
943   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
944   ierr = PetscFree(rwork);CHKERRQ(ierr);
945   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
946   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
947   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
948     *matout = B;
949   } else {
950     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
951   }
952   PetscFunctionReturn(0);
953 }
954 
955 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
956 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
957 
958 PetscErrorCode MatSetUp_MPIDense(Mat A)
959 {
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
964   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
965   if (!A->preallocated) {
966     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
967   }
968   PetscFunctionReturn(0);
969 }
970 
971 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
972 {
973   PetscErrorCode ierr;
974   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
975 
976   PetscFunctionBegin;
977   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatConjugate_MPIDense(Mat mat)
982 {
983   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
984   PetscErrorCode ierr;
985 
986   PetscFunctionBegin;
987   ierr = MatConjugate(a->A);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 PetscErrorCode MatRealPart_MPIDense(Mat A)
992 {
993   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
994   PetscErrorCode ierr;
995 
996   PetscFunctionBegin;
997   ierr = MatRealPart(a->A);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1002 {
1003   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1012 {
1013   PetscErrorCode ierr;
1014   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1015 
1016   PetscFunctionBegin;
1017   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1018   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1019   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1024 
1025 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1026 {
1027   PetscErrorCode ierr;
1028   PetscInt       i,n;
1029   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1030   PetscReal      *work;
1031 
1032   PetscFunctionBegin;
1033   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1034   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1035   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1036   if (type == NORM_2) {
1037     for (i=0; i<n; i++) work[i] *= work[i];
1038   }
1039   if (type == NORM_INFINITY) {
1040     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1041   } else {
1042     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1043   }
1044   ierr = PetscFree(work);CHKERRQ(ierr);
1045   if (type == NORM_2) {
1046     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #if defined(PETSC_HAVE_CUDA)
1052 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1053 {
1054   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1055   PetscErrorCode ierr;
1056   PetscInt       lda;
1057 
1058   PetscFunctionBegin;
1059   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1060   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1061   if (!a->cvec) {
1062     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1063     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1064   }
1065   a->vecinuse = col + 1;
1066   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1067   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1068   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1069   *v   = a->cvec;
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1074 {
1075   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1076   PetscErrorCode ierr;
1077 
1078   PetscFunctionBegin;
1079   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1080   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1081   a->vecinuse = 0;
1082   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1083   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1084   *v   = NULL;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1089 {
1090   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1091   PetscInt       lda;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1096   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1097   if (!a->cvec) {
1098     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1099     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1100   }
1101   a->vecinuse = col + 1;
1102   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1103   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1104   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1105   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1106   *v   = a->cvec;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1111 {
1112   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1117   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1118   a->vecinuse = 0;
1119   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1120   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1121   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1122   *v   = NULL;
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1127 {
1128   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1129   PetscErrorCode ierr;
1130   PetscInt       lda;
1131 
1132   PetscFunctionBegin;
1133   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1134   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1135   if (!a->cvec) {
1136     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1137     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1138   }
1139   a->vecinuse = col + 1;
1140   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1141   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1142   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1143   *v   = a->cvec;
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1148 {
1149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1154   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1155   a->vecinuse = 0;
1156   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1157   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1158   *v   = NULL;
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1163 {
1164   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1169   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1170   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1175 {
1176   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1181   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1182   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1187 {
1188   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1193   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1194   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1199 {
1200   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1209 {
1210   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1219 {
1220   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1229 {
1230   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1239 {
1240   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1249 {
1250   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1251   PetscErrorCode ierr;
1252 
1253   PetscFunctionBegin;
1254   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1259 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1260 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1261 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1262 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1263 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1264 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1265 
1266 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1267 {
1268   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1269   PetscErrorCode ierr;
1270 
1271   PetscFunctionBegin;
1272   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1273   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1274   if (d->A) {
1275     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1276   }
1277   mat->boundtocpu = bind;
1278   if (!bind) {
1279     PetscBool iscuda;
1280 
1281     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1282     if (!iscuda) {
1283       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1284     }
1285     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1286     if (!iscuda) {
1287       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1288     }
1289     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1290     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1291     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1292     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1293     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1294     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1295   } else {
1296     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1297     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1298     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1299     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1300     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1301     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1302   }
1303   if (d->cmat) {
1304     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1310 {
1311   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1312   PetscErrorCode ierr;
1313   PetscBool      iscuda;
1314 
1315   PetscFunctionBegin;
1316   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1317   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1318   if (!iscuda) PetscFunctionReturn(0);
1319   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1320   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1321   if (!d->A) {
1322     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1323     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1324     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1325   }
1326   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1327   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1328   A->preallocated = PETSC_TRUE;
1329   PetscFunctionReturn(0);
1330 }
1331 #endif
1332 
1333 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1334 {
1335   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1344 {
1345   PetscFunctionBegin;
1346   *missing = PETSC_FALSE;
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1351 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1352 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1353 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1354 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1355 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1356 
1357 /* -------------------------------------------------------------------*/
1358 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1359                                         MatGetRow_MPIDense,
1360                                         MatRestoreRow_MPIDense,
1361                                         MatMult_MPIDense,
1362                                 /*  4*/ MatMultAdd_MPIDense,
1363                                         MatMultTranspose_MPIDense,
1364                                         MatMultTransposeAdd_MPIDense,
1365                                         NULL,
1366                                         NULL,
1367                                         NULL,
1368                                 /* 10*/ NULL,
1369                                         NULL,
1370                                         NULL,
1371                                         NULL,
1372                                         MatTranspose_MPIDense,
1373                                 /* 15*/ MatGetInfo_MPIDense,
1374                                         MatEqual_MPIDense,
1375                                         MatGetDiagonal_MPIDense,
1376                                         MatDiagonalScale_MPIDense,
1377                                         MatNorm_MPIDense,
1378                                 /* 20*/ MatAssemblyBegin_MPIDense,
1379                                         MatAssemblyEnd_MPIDense,
1380                                         MatSetOption_MPIDense,
1381                                         MatZeroEntries_MPIDense,
1382                                 /* 24*/ MatZeroRows_MPIDense,
1383                                         NULL,
1384                                         NULL,
1385                                         NULL,
1386                                         NULL,
1387                                 /* 29*/ MatSetUp_MPIDense,
1388                                         NULL,
1389                                         NULL,
1390                                         MatGetDiagonalBlock_MPIDense,
1391                                         NULL,
1392                                 /* 34*/ MatDuplicate_MPIDense,
1393                                         NULL,
1394                                         NULL,
1395                                         NULL,
1396                                         NULL,
1397                                 /* 39*/ MatAXPY_MPIDense,
1398                                         MatCreateSubMatrices_MPIDense,
1399                                         NULL,
1400                                         MatGetValues_MPIDense,
1401                                         MatCopy_MPIDense,
1402                                 /* 44*/ NULL,
1403                                         MatScale_MPIDense,
1404                                         MatShift_Basic,
1405                                         NULL,
1406                                         NULL,
1407                                 /* 49*/ MatSetRandom_MPIDense,
1408                                         NULL,
1409                                         NULL,
1410                                         NULL,
1411                                         NULL,
1412                                 /* 54*/ NULL,
1413                                         NULL,
1414                                         NULL,
1415                                         NULL,
1416                                         NULL,
1417                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1418                                         MatDestroy_MPIDense,
1419                                         MatView_MPIDense,
1420                                         NULL,
1421                                         NULL,
1422                                 /* 64*/ NULL,
1423                                         NULL,
1424                                         NULL,
1425                                         NULL,
1426                                         NULL,
1427                                 /* 69*/ NULL,
1428                                         NULL,
1429                                         NULL,
1430                                         NULL,
1431                                         NULL,
1432                                 /* 74*/ NULL,
1433                                         NULL,
1434                                         NULL,
1435                                         NULL,
1436                                         NULL,
1437                                 /* 79*/ NULL,
1438                                         NULL,
1439                                         NULL,
1440                                         NULL,
1441                                 /* 83*/ MatLoad_MPIDense,
1442                                         NULL,
1443                                         NULL,
1444                                         NULL,
1445                                         NULL,
1446                                         NULL,
1447                                 /* 89*/ NULL,
1448                                         NULL,
1449                                         NULL,
1450                                         NULL,
1451                                         NULL,
1452                                 /* 94*/ NULL,
1453                                         NULL,
1454                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1455                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1456                                         NULL,
1457                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1458                                         NULL,
1459                                         NULL,
1460                                         MatConjugate_MPIDense,
1461                                         NULL,
1462                                 /*104*/ NULL,
1463                                         MatRealPart_MPIDense,
1464                                         MatImaginaryPart_MPIDense,
1465                                         NULL,
1466                                         NULL,
1467                                 /*109*/ NULL,
1468                                         NULL,
1469                                         NULL,
1470                                         MatGetColumnVector_MPIDense,
1471                                         MatMissingDiagonal_MPIDense,
1472                                 /*114*/ NULL,
1473                                         NULL,
1474                                         NULL,
1475                                         NULL,
1476                                         NULL,
1477                                 /*119*/ NULL,
1478                                         NULL,
1479                                         NULL,
1480                                         NULL,
1481                                         NULL,
1482                                 /*124*/ NULL,
1483                                         MatGetColumnNorms_MPIDense,
1484                                         NULL,
1485                                         NULL,
1486                                         NULL,
1487                                 /*129*/ NULL,
1488                                         NULL,
1489                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1490                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1491                                         NULL,
1492                                 /*134*/ NULL,
1493                                         NULL,
1494                                         NULL,
1495                                         NULL,
1496                                         NULL,
1497                                 /*139*/ NULL,
1498                                         NULL,
1499                                         NULL,
1500                                         NULL,
1501                                         NULL,
1502                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1503                                 /*145*/ NULL,
1504                                         NULL,
1505                                         NULL
1506 };
1507 
1508 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1509 {
1510   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1511   PetscBool      iscuda;
1512   PetscErrorCode ierr;
1513 
1514   PetscFunctionBegin;
1515   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1516   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1517   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1518   if (!a->A) {
1519     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1520     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1521     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1522   }
1523   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1524   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1525   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1526   mat->preallocated = PETSC_TRUE;
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #if defined(PETSC_HAVE_ELEMENTAL)
1531 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1532 {
1533   Mat            mat_elemental;
1534   PetscErrorCode ierr;
1535   PetscScalar    *v;
1536   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1537 
1538   PetscFunctionBegin;
1539   if (reuse == MAT_REUSE_MATRIX) {
1540     mat_elemental = *newmat;
1541     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1542   } else {
1543     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1544     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1545     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1546     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1547     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1548   }
1549 
1550   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1551   for (i=0; i<N; i++) cols[i] = i;
1552   for (i=0; i<m; i++) rows[i] = rstart + i;
1553 
1554   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1555   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1556   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1557   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1558   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1559   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1560   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1561 
1562   if (reuse == MAT_INPLACE_MATRIX) {
1563     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1564   } else {
1565     *newmat = mat_elemental;
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 #endif
1570 
1571 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1572 {
1573   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1574   PetscErrorCode ierr;
1575 
1576   PetscFunctionBegin;
1577   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1582 {
1583   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1584   PetscErrorCode ierr;
1585 
1586   PetscFunctionBegin;
1587   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1592 {
1593   PetscErrorCode ierr;
1594   Mat_MPIDense   *mat;
1595   PetscInt       m,nloc,N;
1596 
1597   PetscFunctionBegin;
1598   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1599   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1600   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1601     PetscInt sum;
1602 
1603     if (n == PETSC_DECIDE) {
1604       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1605     }
1606     /* Check sum(n) = N */
1607     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1608     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1609 
1610     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1611   }
1612 
1613   /* numeric phase */
1614   mat = (Mat_MPIDense*)(*outmat)->data;
1615   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1616   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1617   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 #if defined(PETSC_HAVE_CUDA)
1622 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1623 {
1624   Mat            B;
1625   Mat_MPIDense   *m;
1626   PetscErrorCode ierr;
1627 
1628   PetscFunctionBegin;
1629   if (reuse == MAT_INITIAL_MATRIX) {
1630     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1631   } else if (reuse == MAT_REUSE_MATRIX) {
1632     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1633   }
1634 
1635   B    = *newmat;
1636   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1637   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1638   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1639   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1649   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1654   m    = (Mat_MPIDense*)(B)->data;
1655   if (m->A) {
1656     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1657     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1658   }
1659   B->ops->bindtocpu = NULL;
1660   B->offloadmask    = PETSC_OFFLOAD_CPU;
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1665 {
1666   Mat            B;
1667   Mat_MPIDense   *m;
1668   PetscErrorCode ierr;
1669 
1670   PetscFunctionBegin;
1671   if (reuse == MAT_INITIAL_MATRIX) {
1672     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1673   } else if (reuse == MAT_REUSE_MATRIX) {
1674     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1675   }
1676 
1677   B    = *newmat;
1678   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1679   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1680   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1695   m    = (Mat_MPIDense*)(B)->data;
1696   if (m->A) {
1697     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1698     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1699     B->offloadmask = PETSC_OFFLOAD_BOTH;
1700   } else {
1701     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1702   }
1703   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1704 
1705   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1706   PetscFunctionReturn(0);
1707 }
1708 #endif
1709 
1710 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1711 {
1712   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1713   PetscErrorCode ierr;
1714   PetscInt       lda;
1715 
1716   PetscFunctionBegin;
1717   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1718   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1719   if (!a->cvec) {
1720     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1721   }
1722   a->vecinuse = col + 1;
1723   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1724   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1725   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1726   *v   = a->cvec;
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1731 {
1732   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1733   PetscErrorCode ierr;
1734 
1735   PetscFunctionBegin;
1736   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1737   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1738   a->vecinuse = 0;
1739   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1740   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1741   *v   = NULL;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1746 {
1747   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1748   PetscErrorCode ierr;
1749   PetscInt       lda;
1750 
1751   PetscFunctionBegin;
1752   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1753   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1754   if (!a->cvec) {
1755     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1756   }
1757   a->vecinuse = col + 1;
1758   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1759   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1760   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1761   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1762   *v   = a->cvec;
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1767 {
1768   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1769   PetscErrorCode ierr;
1770 
1771   PetscFunctionBegin;
1772   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1773   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1774   a->vecinuse = 0;
1775   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1776   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1777   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1778   *v   = NULL;
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1783 {
1784   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1785   PetscErrorCode ierr;
1786   PetscInt       lda;
1787 
1788   PetscFunctionBegin;
1789   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1790   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1791   if (!a->cvec) {
1792     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1793   }
1794   a->vecinuse = col + 1;
1795   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1796   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1797   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1798   *v   = a->cvec;
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1803 {
1804   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1805   PetscErrorCode ierr;
1806 
1807   PetscFunctionBegin;
1808   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1809   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1810   a->vecinuse = 0;
1811   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1812   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1813   *v   = NULL;
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1818 {
1819   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1820   Mat_MPIDense   *c;
1821   PetscErrorCode ierr;
1822   MPI_Comm       comm;
1823   PetscBool      setup = PETSC_FALSE;
1824 
1825   PetscFunctionBegin;
1826   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1827   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1828   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1829   if (!a->cmat) {
1830     setup = PETSC_TRUE;
1831     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1832     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1833     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1834     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1835     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1836     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1837   } else if (cend-cbegin != a->cmat->cmap->N) {
1838     setup = PETSC_TRUE;
1839     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1840     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1841     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1842     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1843   }
1844   c = (Mat_MPIDense*)a->cmat->data;
1845   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1846   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1847   if (setup) { /* do we really need this? */
1848     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1849   }
1850   a->cmat->preallocated = PETSC_TRUE;
1851   a->cmat->assembled = PETSC_TRUE;
1852   a->matinuse = cbegin + 1;
1853   *v = a->cmat;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1858 {
1859   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1860   Mat_MPIDense   *c;
1861   PetscErrorCode ierr;
1862 
1863   PetscFunctionBegin;
1864   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1865   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1866   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1867   a->matinuse = 0;
1868   c    = (Mat_MPIDense*)a->cmat->data;
1869   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1870   *v   = NULL;
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 /*MC
1875    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1876 
1877    Options Database Keys:
1878 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1879 
1880   Level: beginner
1881 
1882 .seealso: MatCreateDense()
1883 
1884 M*/
1885 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1886 {
1887   Mat_MPIDense   *a;
1888   PetscErrorCode ierr;
1889 
1890   PetscFunctionBegin;
1891   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1892   mat->data = (void*)a;
1893   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1894 
1895   mat->insertmode = NOT_SET_VALUES;
1896 
1897   /* build cache for off array entries formed */
1898   a->donotstash = PETSC_FALSE;
1899 
1900   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1901 
1902   /* stuff used for matrix vector multiply */
1903   a->lvec        = NULL;
1904   a->Mvctx       = NULL;
1905   a->roworiented = PETSC_TRUE;
1906 
1907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1922   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1923   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1924   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1925   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1926 #if defined(PETSC_HAVE_ELEMENTAL)
1927   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1928 #endif
1929 #if defined(PETSC_HAVE_SCALAPACK)
1930   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1931 #endif
1932 #if defined(PETSC_HAVE_CUDA)
1933   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1934 #endif
1935   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1936   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1937   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1938 #if defined(PETSC_HAVE_CUDA)
1939   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1940   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1941 #endif
1942 
1943   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1944   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1945   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 /*MC
1950    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1951 
1952    Options Database Keys:
1953 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1954 
1955   Level: beginner
1956 
1957 .seealso:
1958 
1959 M*/
1960 #if defined(PETSC_HAVE_CUDA)
1961 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1962 {
1963   PetscErrorCode ierr;
1964 
1965   PetscFunctionBegin;
1966   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
1967   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1968   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1969   PetscFunctionReturn(0);
1970 }
1971 #endif
1972 
1973 /*MC
1974    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1975 
1976    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1977    and MATMPIDENSE otherwise.
1978 
1979    Options Database Keys:
1980 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1981 
1982   Level: beginner
1983 
1984 
1985 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1986 M*/
1987 
1988 /*MC
1989    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1990 
1991    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1992    and MATMPIDENSECUDA otherwise.
1993 
1994    Options Database Keys:
1995 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1996 
1997   Level: beginner
1998 
1999 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
2000 M*/
2001 
2002 /*@C
2003    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2004 
2005    Collective
2006 
2007    Input Parameters:
2008 .  B - the matrix
2009 -  data - optional location of matrix data.  Set data=NULL for PETSc
2010    to control all matrix memory allocation.
2011 
2012    Notes:
2013    The dense format is fully compatible with standard Fortran 77
2014    storage by columns.
2015 
2016    The data input variable is intended primarily for Fortran programmers
2017    who wish to allocate their own matrix memory space.  Most users should
2018    set data=NULL.
2019 
2020    Level: intermediate
2021 
2022 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2023 @*/
2024 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2025 {
2026   PetscErrorCode ierr;
2027 
2028   PetscFunctionBegin;
2029   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2030   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 /*@
2035    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2036    array provided by the user. This is useful to avoid copying an array
2037    into a matrix
2038 
2039    Not Collective
2040 
2041    Input Parameters:
2042 +  mat - the matrix
2043 -  array - the array in column major order
2044 
2045    Notes:
2046    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2047    freed when the matrix is destroyed.
2048 
2049    Level: developer
2050 
2051 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2052 
2053 @*/
2054 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2055 {
2056   PetscErrorCode ierr;
2057 
2058   PetscFunctionBegin;
2059   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2060   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2061   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2062 #if defined(PETSC_HAVE_CUDA)
2063   mat->offloadmask = PETSC_OFFLOAD_CPU;
2064 #endif
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*@
2069    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2070 
2071    Not Collective
2072 
2073    Input Parameters:
2074 .  mat - the matrix
2075 
2076    Notes:
2077    You can only call this after a call to MatDensePlaceArray()
2078 
2079    Level: developer
2080 
2081 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2082 
2083 @*/
2084 PetscErrorCode  MatDenseResetArray(Mat mat)
2085 {
2086   PetscErrorCode ierr;
2087 
2088   PetscFunctionBegin;
2089   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2090   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2091   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 /*@
2096    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2097    array provided by the user. This is useful to avoid copying an array
2098    into a matrix
2099 
2100    Not Collective
2101 
2102    Input Parameters:
2103 +  mat - the matrix
2104 -  array - the array in column major order
2105 
2106    Notes:
2107    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2108    freed by the user. It will be freed when the matrix is destroyed.
2109 
2110    Level: developer
2111 
2112 .seealso: MatDenseGetArray(), VecReplaceArray()
2113 @*/
2114 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2115 {
2116   PetscErrorCode ierr;
2117 
2118   PetscFunctionBegin;
2119   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2120   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2121   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2122 #if defined(PETSC_HAVE_CUDA)
2123   mat->offloadmask = PETSC_OFFLOAD_CPU;
2124 #endif
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #if defined(PETSC_HAVE_CUDA)
2129 /*@C
2130    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2131    array provided by the user. This is useful to avoid copying an array
2132    into a matrix
2133 
2134    Not Collective
2135 
2136    Input Parameters:
2137 +  mat - the matrix
2138 -  array - the array in column major order
2139 
2140    Notes:
2141    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2142    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2143 
2144    Level: developer
2145 
2146 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2147 @*/
2148 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2149 {
2150   PetscErrorCode ierr;
2151 
2152   PetscFunctionBegin;
2153   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2154   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2155   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2156   mat->offloadmask = PETSC_OFFLOAD_GPU;
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 /*@C
2161    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2162 
2163    Not Collective
2164 
2165    Input Parameters:
2166 .  mat - the matrix
2167 
2168    Notes:
2169    You can only call this after a call to MatDenseCUDAPlaceArray()
2170 
2171    Level: developer
2172 
2173 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2174 
2175 @*/
2176 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2177 {
2178   PetscErrorCode ierr;
2179 
2180   PetscFunctionBegin;
2181   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2182   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2183   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 /*@C
2188    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2189    array provided by the user. This is useful to avoid copying an array
2190    into a matrix
2191 
2192    Not Collective
2193 
2194    Input Parameters:
2195 +  mat - the matrix
2196 -  array - the array in column major order
2197 
2198    Notes:
2199    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2200    The memory passed in CANNOT be freed by the user. It will be freed
2201    when the matrix is destroyed. The array should respect the matrix leading dimension.
2202 
2203    Level: developer
2204 
2205 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2206 @*/
2207 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2208 {
2209   PetscErrorCode ierr;
2210 
2211   PetscFunctionBegin;
2212   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2213   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2214   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2215   mat->offloadmask = PETSC_OFFLOAD_GPU;
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 /*@C
2220    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2221 
2222    Not Collective
2223 
2224    Input Parameters:
2225 .  A - the matrix
2226 
2227    Output Parameters
2228 .  array - the GPU array in column major order
2229 
2230    Notes:
2231    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2232 
2233    Level: developer
2234 
2235 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2236 @*/
2237 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2238 {
2239   PetscErrorCode ierr;
2240 
2241   PetscFunctionBegin;
2242   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2243   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2244   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 /*@C
2249    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2250 
2251    Not Collective
2252 
2253    Input Parameters:
2254 +  A - the matrix
2255 -  array - the GPU array in column major order
2256 
2257    Notes:
2258 
2259    Level: developer
2260 
2261 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2262 @*/
2263 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2264 {
2265   PetscErrorCode ierr;
2266 
2267   PetscFunctionBegin;
2268   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2269   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2270   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2271   A->offloadmask = PETSC_OFFLOAD_GPU;
2272   PetscFunctionReturn(0);
2273 }
2274 
2275 /*@C
2276    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2277 
2278    Not Collective
2279 
2280    Input Parameters:
2281 .  A - the matrix
2282 
2283    Output Parameters
2284 .  array - the GPU array in column major order
2285 
2286    Notes:
2287    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2288 
2289    Level: developer
2290 
2291 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2292 @*/
2293 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2294 {
2295   PetscErrorCode ierr;
2296 
2297   PetscFunctionBegin;
2298   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2299   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 /*@C
2304    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2305 
2306    Not Collective
2307 
2308    Input Parameters:
2309 +  A - the matrix
2310 -  array - the GPU array in column major order
2311 
2312    Notes:
2313    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2314 
2315    Level: developer
2316 
2317 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2318 @*/
2319 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2320 {
2321   PetscErrorCode ierr;
2322 
2323   PetscFunctionBegin;
2324   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2325   PetscFunctionReturn(0);
2326 }
2327 
2328 /*@C
2329    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2330 
2331    Not Collective
2332 
2333    Input Parameters:
2334 .  A - the matrix
2335 
2336    Output Parameters
2337 .  array - the GPU array in column major order
2338 
2339    Notes:
2340    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2341 
2342    Level: developer
2343 
2344 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2345 @*/
2346 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2347 {
2348   PetscErrorCode ierr;
2349 
2350   PetscFunctionBegin;
2351   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2352   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2353   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 /*@C
2358    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2359 
2360    Not Collective
2361 
2362    Input Parameters:
2363 +  A - the matrix
2364 -  array - the GPU array in column major order
2365 
2366    Notes:
2367 
2368    Level: developer
2369 
2370 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2371 @*/
2372 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2373 {
2374   PetscErrorCode ierr;
2375 
2376   PetscFunctionBegin;
2377   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2378   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2379   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2380   A->offloadmask = PETSC_OFFLOAD_GPU;
2381   PetscFunctionReturn(0);
2382 }
2383 #endif
2384 
2385 /*@C
2386    MatCreateDense - Creates a matrix in dense format.
2387 
2388    Collective
2389 
2390    Input Parameters:
2391 +  comm - MPI communicator
2392 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2393 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2394 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2395 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2396 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2397    to control all matrix memory allocation.
2398 
2399    Output Parameter:
2400 .  A - the matrix
2401 
2402    Notes:
2403    The dense format is fully compatible with standard Fortran 77
2404    storage by columns.
2405    Note that, although local portions of the matrix are stored in column-major
2406    order, the matrix is partitioned across MPI ranks by row.
2407 
2408    The data input variable is intended primarily for Fortran programmers
2409    who wish to allocate their own matrix memory space.  Most users should
2410    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2411 
2412    The user MUST specify either the local or global matrix dimensions
2413    (possibly both).
2414 
2415    Level: intermediate
2416 
2417 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2418 @*/
2419 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2420 {
2421   PetscErrorCode ierr;
2422   PetscMPIInt    size;
2423 
2424   PetscFunctionBegin;
2425   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2426   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2427   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2428   if (size > 1) {
2429     PetscBool havedata = (PetscBool)!!data;
2430 
2431     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2432     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2433     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr);
2434     if (havedata) {  /* user provided data array, so no need to assemble */
2435       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2436       (*A)->assembled = PETSC_TRUE;
2437     }
2438   } else {
2439     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2440     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2441   }
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 #if defined(PETSC_HAVE_CUDA)
2446 /*@C
2447    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2448 
2449    Collective
2450 
2451    Input Parameters:
2452 +  comm - MPI communicator
2453 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2454 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2455 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2456 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2457 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2458    to control matrix memory allocation.
2459 
2460    Output Parameter:
2461 .  A - the matrix
2462 
2463    Notes:
2464 
2465    Level: intermediate
2466 
2467 .seealso: MatCreate(), MatCreateDense()
2468 @*/
2469 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2470 {
2471   PetscErrorCode ierr;
2472   PetscMPIInt    size;
2473 
2474   PetscFunctionBegin;
2475   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2476   PetscValidLogicalCollectiveBool(*A,!!data,6);
2477   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2478   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2479   if (size > 1) {
2480     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2481     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2482     if (data) {  /* user provided data array, so no need to assemble */
2483       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2484       (*A)->assembled = PETSC_TRUE;
2485     }
2486   } else {
2487     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2488     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2489   }
2490   PetscFunctionReturn(0);
2491 }
2492 #endif
2493 
2494 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2495 {
2496   Mat            mat;
2497   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2498   PetscErrorCode ierr;
2499 
2500   PetscFunctionBegin;
2501   *newmat = NULL;
2502   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2503   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2504   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2505   a       = (Mat_MPIDense*)mat->data;
2506 
2507   mat->factortype   = A->factortype;
2508   mat->assembled    = PETSC_TRUE;
2509   mat->preallocated = PETSC_TRUE;
2510 
2511   mat->insertmode = NOT_SET_VALUES;
2512   a->donotstash   = oldmat->donotstash;
2513 
2514   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2515   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2516 
2517   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2518   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2519   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2520 
2521   *newmat = mat;
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2526 {
2527   PetscErrorCode ierr;
2528   PetscBool      isbinary;
2529 #if defined(PETSC_HAVE_HDF5)
2530   PetscBool      ishdf5;
2531 #endif
2532 
2533   PetscFunctionBegin;
2534   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2535   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2536   /* force binary viewer to load .info file if it has not yet done so */
2537   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2538   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2539 #if defined(PETSC_HAVE_HDF5)
2540   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2541 #endif
2542   if (isbinary) {
2543     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2544 #if defined(PETSC_HAVE_HDF5)
2545   } else if (ishdf5) {
2546     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2547 #endif
2548   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2553 {
2554   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2555   Mat            a,b;
2556   PetscBool      flg;
2557   PetscErrorCode ierr;
2558 
2559   PetscFunctionBegin;
2560   a    = matA->A;
2561   b    = matB->A;
2562   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2563   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2568 {
2569   PetscErrorCode        ierr;
2570   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2571 
2572   PetscFunctionBegin;
2573   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2574   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2575   ierr = PetscFree(atb);CHKERRQ(ierr);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2580 {
2581   PetscErrorCode        ierr;
2582   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2583 
2584   PetscFunctionBegin;
2585   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2586   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2587   ierr = PetscFree(abt);CHKERRQ(ierr);
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2592 {
2593   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2594   Mat_TransMatMultDense *atb;
2595   PetscErrorCode        ierr;
2596   MPI_Comm              comm;
2597   PetscMPIInt           size,*recvcounts;
2598   PetscScalar           *carray,*sendbuf;
2599   const PetscScalar     *atbarray;
2600   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2601   const PetscInt        *ranges;
2602 
2603   PetscFunctionBegin;
2604   MatCheckProduct(C,3);
2605   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2606   atb = (Mat_TransMatMultDense *)C->product->data;
2607   recvcounts = atb->recvcounts;
2608   sendbuf = atb->sendbuf;
2609 
2610   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2611   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2612 
2613   /* compute atbarray = aseq^T * bseq */
2614   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2615 
2616   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2617 
2618   /* arrange atbarray into sendbuf */
2619   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2620   for (proc=0, k=0; proc<size; proc++) {
2621     for (j=0; j<cN; j++) {
2622       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2623     }
2624   }
2625   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2626 
2627   /* sum all atbarray to local values of C */
2628   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2629   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr);
2630   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2631   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2632   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2637 {
2638   PetscErrorCode        ierr;
2639   MPI_Comm              comm;
2640   PetscMPIInt           size;
2641   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2642   Mat_TransMatMultDense *atb;
2643   PetscBool             cisdense;
2644   PetscInt              i;
2645   const PetscInt        *ranges;
2646 
2647   PetscFunctionBegin;
2648   MatCheckProduct(C,3);
2649   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2650   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2651   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2652     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2653   }
2654 
2655   /* create matrix product C */
2656   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2657   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2658   if (!cisdense) {
2659     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2660   }
2661   ierr = MatSetUp(C);CHKERRQ(ierr);
2662 
2663   /* create data structure for reuse C */
2664   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2665   ierr = PetscNew(&atb);CHKERRQ(ierr);
2666   cM   = C->rmap->N;
2667   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2668   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2669   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2670 
2671   C->product->data    = atb;
2672   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2673   PetscFunctionReturn(0);
2674 }
2675 
2676 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2677 {
2678   PetscErrorCode        ierr;
2679   MPI_Comm              comm;
2680   PetscMPIInt           i, size;
2681   PetscInt              maxRows, bufsiz;
2682   PetscMPIInt           tag;
2683   PetscInt              alg;
2684   Mat_MatTransMultDense *abt;
2685   Mat_Product           *product = C->product;
2686   PetscBool             flg;
2687 
2688   PetscFunctionBegin;
2689   MatCheckProduct(C,4);
2690   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2691   /* check local size of A and B */
2692   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2693 
2694   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2695   alg  = flg ? 0 : 1;
2696 
2697   /* setup matrix product C */
2698   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2699   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2700   ierr = MatSetUp(C);CHKERRQ(ierr);
2701   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2702 
2703   /* create data structure for reuse C */
2704   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2705   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2706   ierr = PetscNew(&abt);CHKERRQ(ierr);
2707   abt->tag = tag;
2708   abt->alg = alg;
2709   switch (alg) {
2710   case 1: /* alg: "cyclic" */
2711     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2712     bufsiz = A->cmap->N * maxRows;
2713     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2714     break;
2715   default: /* alg: "allgatherv" */
2716     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2717     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2718     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2719     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2720     break;
2721   }
2722 
2723   C->product->data    = abt;
2724   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2725   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2730 {
2731   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2732   Mat_MatTransMultDense *abt;
2733   PetscErrorCode        ierr;
2734   MPI_Comm              comm;
2735   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2736   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2737   PetscInt              i,cK=A->cmap->N,k,j,bn;
2738   PetscScalar           _DOne=1.0,_DZero=0.0;
2739   const PetscScalar     *av,*bv;
2740   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2741   MPI_Request           reqs[2];
2742   const PetscInt        *ranges;
2743 
2744   PetscFunctionBegin;
2745   MatCheckProduct(C,3);
2746   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2747   abt  = (Mat_MatTransMultDense*)C->product->data;
2748   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2749   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2750   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2751   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2752   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2753   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2754   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2755   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2756   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2757   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2758   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2759   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2760   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2761   bn   = B->rmap->n;
2762   if (blda == bn) {
2763     sendbuf = (PetscScalar*)bv;
2764   } else {
2765     sendbuf = abt->buf[0];
2766     for (k = 0, i = 0; i < cK; i++) {
2767       for (j = 0; j < bn; j++, k++) {
2768         sendbuf[k] = bv[i * blda + j];
2769       }
2770     }
2771   }
2772   if (size > 1) {
2773     sendto = (rank + size - 1) % size;
2774     recvfrom = (rank + size + 1) % size;
2775   } else {
2776     sendto = recvfrom = 0;
2777   }
2778   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2779   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2780   recvisfrom = rank;
2781   for (i = 0; i < size; i++) {
2782     /* we have finished receiving in sending, bufs can be read/modified */
2783     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2784     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2785 
2786     if (nextrecvisfrom != rank) {
2787       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2788       sendsiz = cK * bn;
2789       recvsiz = cK * nextbn;
2790       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2791       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr);
2792       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr);
2793     }
2794 
2795     /* local aseq * sendbuf^T */
2796     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2797     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2798 
2799     if (nextrecvisfrom != rank) {
2800       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2801       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
2802     }
2803     bn = nextbn;
2804     recvisfrom = nextrecvisfrom;
2805     sendbuf = recvbuf;
2806   }
2807   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2808   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2809   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2810   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2811   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2816 {
2817   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2818   Mat_MatTransMultDense *abt;
2819   PetscErrorCode        ierr;
2820   MPI_Comm              comm;
2821   PetscMPIInt           size;
2822   PetscScalar           *cv, *sendbuf, *recvbuf;
2823   const PetscScalar     *av,*bv;
2824   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2825   PetscScalar           _DOne=1.0,_DZero=0.0;
2826   PetscBLASInt          cm, cn, ck, alda, clda;
2827 
2828   PetscFunctionBegin;
2829   MatCheckProduct(C,3);
2830   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2831   abt  = (Mat_MatTransMultDense*)C->product->data;
2832   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2833   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2834   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2835   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2836   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2837   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2838   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2839   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2840   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2841   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2842   /* copy transpose of B into buf[0] */
2843   bn      = B->rmap->n;
2844   sendbuf = abt->buf[0];
2845   recvbuf = abt->buf[1];
2846   for (k = 0, j = 0; j < bn; j++) {
2847     for (i = 0; i < cK; i++, k++) {
2848       sendbuf[k] = bv[i * blda + j];
2849     }
2850   }
2851   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2852   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr);
2853   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2854   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2855   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2856   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2857   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2858   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2859   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2860   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2861   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2866 {
2867   Mat_MatTransMultDense *abt;
2868   PetscErrorCode        ierr;
2869 
2870   PetscFunctionBegin;
2871   MatCheckProduct(C,3);
2872   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2873   abt = (Mat_MatTransMultDense*)C->product->data;
2874   switch (abt->alg) {
2875   case 1:
2876     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2877     break;
2878   default:
2879     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2880     break;
2881   }
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2886 {
2887   PetscErrorCode   ierr;
2888   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2889 
2890   PetscFunctionBegin;
2891   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2892   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2893   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2894   ierr = PetscFree(ab);CHKERRQ(ierr);
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 #if defined(PETSC_HAVE_ELEMENTAL)
2899 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2900 {
2901   PetscErrorCode   ierr;
2902   Mat_MatMultDense *ab;
2903 
2904   PetscFunctionBegin;
2905   MatCheckProduct(C,3);
2906   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2907   ab   = (Mat_MatMultDense*)C->product->data;
2908   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2909   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2910   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2911   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2912   PetscFunctionReturn(0);
2913 }
2914 
2915 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2916 {
2917   PetscErrorCode   ierr;
2918   Mat              Ae,Be,Ce;
2919   Mat_MatMultDense *ab;
2920 
2921   PetscFunctionBegin;
2922   MatCheckProduct(C,4);
2923   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2924   /* check local size of A and B */
2925   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2926     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2927   }
2928 
2929   /* create elemental matrices Ae and Be */
2930   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2931   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2932   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2933   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2934   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2935 
2936   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2937   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2938   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2939   ierr = MatSetUp(Be);CHKERRQ(ierr);
2940   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2941 
2942   /* compute symbolic Ce = Ae*Be */
2943   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2944   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2945 
2946   /* setup C */
2947   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2948   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2949   ierr = MatSetUp(C);CHKERRQ(ierr);
2950 
2951   /* create data structure for reuse Cdense */
2952   ierr = PetscNew(&ab);CHKERRQ(ierr);
2953   ab->Ae = Ae;
2954   ab->Be = Be;
2955   ab->Ce = Ce;
2956 
2957   C->product->data    = ab;
2958   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2959   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2960   PetscFunctionReturn(0);
2961 }
2962 #endif
2963 /* ----------------------------------------------- */
2964 #if defined(PETSC_HAVE_ELEMENTAL)
2965 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2966 {
2967   PetscFunctionBegin;
2968   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2969   C->ops->productsymbolic = MatProductSymbolic_AB;
2970   PetscFunctionReturn(0);
2971 }
2972 #endif
2973 
2974 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2975 {
2976   Mat_Product *product = C->product;
2977   Mat         A = product->A,B=product->B;
2978 
2979   PetscFunctionBegin;
2980   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2981     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2982   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2983   C->ops->productsymbolic = MatProductSymbolic_AtB;
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2988 {
2989   PetscErrorCode ierr;
2990   Mat_Product    *product = C->product;
2991   const char     *algTypes[2] = {"allgatherv","cyclic"};
2992   PetscInt       alg,nalg = 2;
2993   PetscBool      flg = PETSC_FALSE;
2994 
2995   PetscFunctionBegin;
2996   /* Set default algorithm */
2997   alg = 0; /* default is allgatherv */
2998   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2999   if (flg) {
3000     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3001   }
3002 
3003   /* Get runtime option */
3004   if (product->api_user) {
3005     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
3006     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3007     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3008   } else {
3009     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
3010     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3011     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3012   }
3013   if (flg) {
3014     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3015   }
3016 
3017   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3018   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3019   PetscFunctionReturn(0);
3020 }
3021 
3022 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3023 {
3024   PetscErrorCode ierr;
3025   Mat_Product    *product = C->product;
3026 
3027   PetscFunctionBegin;
3028   switch (product->type) {
3029 #if defined(PETSC_HAVE_ELEMENTAL)
3030   case MATPRODUCT_AB:
3031     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3032     break;
3033 #endif
3034   case MATPRODUCT_AtB:
3035     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3036     break;
3037   case MATPRODUCT_ABt:
3038     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3039     break;
3040   default:
3041     break;
3042   }
3043   PetscFunctionReturn(0);
3044 }
3045