xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision f210f596a55b992253e3045dd28ed212964dcd44)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
46   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
47   PetscErrorCode ierr;
48 
49   PetscFunctionBegin;
50   ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
55 {
56   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
57   PetscErrorCode ierr;
58   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
59 
60   PetscFunctionBegin;
61   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
62   lrow = row - rstart;
63   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
68 {
69   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
70   PetscErrorCode ierr;
71   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
72 
73   PetscFunctionBegin;
74   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
75   lrow = row - rstart;
76   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
81 {
82   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
83   PetscErrorCode ierr;
84   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
85   PetscScalar    *array;
86   MPI_Comm       comm;
87   PetscBool      flg;
88   Mat            B;
89 
90   PetscFunctionBegin;
91   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
92   PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
93   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
94   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
95 
96     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
97     PetscCheckFalse(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
98     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
99     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
100     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
101     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
102     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
103     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
104     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
105     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
106     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
107     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
108     *a   = B;
109     ierr = MatDestroy(&B);CHKERRQ(ierr);
110   } else *a = B;
111   PetscFunctionReturn(0);
112 }
113 
114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
117   PetscErrorCode ierr;
118   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
119   PetscBool      roworiented = A->roworiented;
120 
121   PetscFunctionBegin;
122   for (i=0; i<m; i++) {
123     if (idxm[i] < 0) continue;
124     PetscCheckFalse(idxm[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
125     if (idxm[i] >= rstart && idxm[i] < rend) {
126       row = idxm[i] - rstart;
127       if (roworiented) {
128         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
129       } else {
130         for (j=0; j<n; j++) {
131           if (idxn[j] < 0) continue;
132           PetscCheckFalse(idxn[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
133           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
134         }
135       }
136     } else if (!A->donotstash) {
137       mat->assembled = PETSC_FALSE;
138       if (roworiented) {
139         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
140       } else {
141         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
142       }
143     }
144   }
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
149 {
150   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
151   PetscErrorCode ierr;
152   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
153 
154   PetscFunctionBegin;
155   for (i=0; i<m; i++) {
156     if (idxm[i] < 0) continue; /* negative row */
157     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
158     if (idxm[i] >= rstart && idxm[i] < rend) {
159       row = idxm[i] - rstart;
160       for (j=0; j<n; j++) {
161         if (idxn[j] < 0) continue; /* negative column */
162         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
163         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
164       }
165     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
181 {
182   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
183   PetscBool      iscuda;
184   PetscErrorCode ierr;
185 
186   PetscFunctionBegin;
187   if (!a->A) {
188     PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
189     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
190     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
191     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
192     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
193     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
194     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
195     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
196   }
197   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
230   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
235 {
236   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
237   PetscErrorCode ierr;
238 
239   PetscFunctionBegin;
240   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
241   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
242   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
247 {
248   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
253   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
254   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
259 {
260   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
261   PetscErrorCode ierr;
262 
263   PetscFunctionBegin;
264   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
265   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
266   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
271 {
272   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
273   PetscErrorCode    ierr;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
284   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
285   PetscCheckFalse(comm_mat != comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
288   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
289   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
290   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
291   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
292   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
299   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
309     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
310     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
311     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
317   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
318   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
326   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
327 
328   /* Assemble the matrices so that the correct flags are set */
329   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331 
332   /* Free work space */
333   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
334   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
335   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343   PetscErrorCode ierr;
344 
345   PetscFunctionBegin;
346   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
351 {
352   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
361 {
362   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
363   PetscErrorCode ierr;
364 
365   PetscFunctionBegin;
366   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
371 {
372   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
373   PetscErrorCode ierr;
374   PetscInt       nstash,reallocs;
375 
376   PetscFunctionBegin;
377   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
378 
379   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
380   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
381   ierr = PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
386 {
387   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
388   PetscErrorCode ierr;
389   PetscInt       i,*row,*col,flg,j,rstart,ncols;
390   PetscMPIInt    n;
391   PetscScalar    *val;
392 
393   PetscFunctionBegin;
394   if (!mdn->donotstash && !mat->nooffprocentries) {
395     /*  wait on receives */
396     while (1) {
397       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
398       if (!flg) break;
399 
400       for (i=0; i<n;) {
401         /* Now identify the consecutive vals belonging to the same row */
402         for (j=i,rstart=row[j]; j<n; j++) {
403           if (row[j] != rstart) break;
404         }
405         if (j < n) ncols = j-i;
406         else       ncols = n-i;
407         /* Now assemble all these values with a single function call */
408         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
409         i    = j;
410       }
411     }
412     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
413   }
414 
415   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
417 
418   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
419     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
425 {
426   PetscErrorCode ierr;
427   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
428 
429   PetscFunctionBegin;
430   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
435 {
436   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
437   PetscErrorCode    ierr;
438   PetscInt          i,len,*lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
449     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
450     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
451     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
452     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
453   }
454   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
455   if (diag != 0.0) {
456     Vec d;
457 
458     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
459     ierr = VecSet(d,diag);CHKERRQ(ierr);
460     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
461     ierr = VecDestroy(&d);CHKERRQ(ierr);
462   }
463   ierr = PetscFree(lrows);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode    ierr;
476   const PetscScalar *ax;
477   PetscScalar       *ay;
478   PetscMemType      axmtype,aymtype;
479 
480   PetscFunctionBegin;
481   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
482   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
483   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr);
484   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr);
485   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
486   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
487   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
494   PetscErrorCode    ierr;
495   const PetscScalar *ax;
496   PetscScalar       *ay;
497   PetscMemType      axmtype,aymtype;
498 
499   PetscFunctionBegin;
500   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
501   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
502   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr);
503   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr);
504   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
505   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
506   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode    ierr;
514   const PetscScalar *ax;
515   PetscScalar       *ay;
516   PetscMemType      axmtype,aymtype;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
520   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
522   ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr);
523   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
524   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
525   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
526   ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
533   PetscErrorCode    ierr;
534   const PetscScalar *ax;
535   PetscScalar       *ay;
536   PetscMemType      axmtype,aymtype;
537 
538   PetscFunctionBegin;
539   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
540   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
541   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
542   ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr);
543   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
544   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
545   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
546   ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
551 {
552   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
553   PetscErrorCode    ierr;
554   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
555   PetscScalar       *x,zero = 0.0;
556   const PetscScalar *av;
557 
558   PetscFunctionBegin;
559   ierr = VecSet(v,zero);CHKERRQ(ierr);
560   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
561   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
562   PetscCheckFalse(n != A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
563   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
564   radd = A->rmap->rstart*m;
565   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
566   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
567   for (i=0; i<len; i++) {
568     x[i] = av[radd + i*lda + i];
569   }
570   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
571   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatDestroy_MPIDense(Mat mat)
576 {
577   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
578   PetscErrorCode ierr;
579 
580   PetscFunctionBegin;
581 #if defined(PETSC_USE_LOG)
582   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
583 #endif
584   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
585   PetscCheckFalse(mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
586   PetscCheckFalse(mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
587   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
588   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
589   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
590   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
591   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
592 
593   ierr = PetscFree(mat->data);CHKERRQ(ierr);
594   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
595 
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
609 #if defined(PETSC_HAVE_ELEMENTAL)
610   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
611 #endif
612 #if defined(PETSC_HAVE_SCALAPACK)
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
614 #endif
615   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
616   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
617   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
618 #if defined (PETSC_HAVE_CUDA)
619   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
620   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
621 #endif
622   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
623   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
624   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
625   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
626   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
627   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
628   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
630   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
631   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
636 
637 #include <petscdraw.h>
638 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
639 {
640   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
641   PetscErrorCode    ierr;
642   PetscMPIInt       rank;
643   PetscViewerType   vtype;
644   PetscBool         iascii,isdraw;
645   PetscViewer       sviewer;
646   PetscViewerFormat format;
647 
648   PetscFunctionBegin;
649   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
650   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
651   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
652   if (iascii) {
653     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
654     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
655     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
656       MatInfo info;
657       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
658       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
659       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
660       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
661       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
662       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
663       PetscFunctionReturn(0);
664     } else if (format == PETSC_VIEWER_ASCII_INFO) {
665       PetscFunctionReturn(0);
666     }
667   } else if (isdraw) {
668     PetscDraw draw;
669     PetscBool isnull;
670 
671     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
672     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
673     if (isnull) PetscFunctionReturn(0);
674   }
675 
676   {
677     /* assemble the entire matrix onto first processor. */
678     Mat         A;
679     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
680     PetscInt    *cols;
681     PetscScalar *vals;
682 
683     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
684     if (rank == 0) {
685       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
686     } else {
687       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
688     }
689     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
690     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
691     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
692     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
693 
694     /* Copy the matrix ... This isn't the most efficient means,
695        but it's quick for now */
696     A->insertmode = INSERT_VALUES;
697 
698     row = mat->rmap->rstart;
699     m   = mdn->A->rmap->n;
700     for (i=0; i<m; i++) {
701       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
702       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
703       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
704       row++;
705     }
706 
707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
710     if (rank == 0) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
715     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
716     ierr = MatDestroy(&A);CHKERRQ(ierr);
717   }
718   PetscFunctionReturn(0);
719 }
720 
721 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
722 {
723   PetscErrorCode ierr;
724   PetscBool      iascii,isbinary,isdraw,issocket;
725 
726   PetscFunctionBegin;
727   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
728   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
729   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
730   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
731 
732   if (iascii || issocket || isdraw) {
733     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else if (isbinary) {
735     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
741 {
742   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
743   Mat            mdn  = mat->A;
744   PetscErrorCode ierr;
745   PetscLogDouble isend[5],irecv[5];
746 
747   PetscFunctionBegin;
748   info->block_size = 1.0;
749 
750   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
751 
752   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
753   isend[3] = info->memory;  isend[4] = info->mallocs;
754   if (flag == MAT_LOCAL) {
755     info->nz_used      = isend[0];
756     info->nz_allocated = isend[1];
757     info->nz_unneeded  = isend[2];
758     info->memory       = isend[3];
759     info->mallocs      = isend[4];
760   } else if (flag == MAT_GLOBAL_MAX) {
761     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
762 
763     info->nz_used      = irecv[0];
764     info->nz_allocated = irecv[1];
765     info->nz_unneeded  = irecv[2];
766     info->memory       = irecv[3];
767     info->mallocs      = irecv[4];
768   } else if (flag == MAT_GLOBAL_SUM) {
769     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
770 
771     info->nz_used      = irecv[0];
772     info->nz_allocated = irecv[1];
773     info->nz_unneeded  = irecv[2];
774     info->memory       = irecv[3];
775     info->mallocs      = irecv[4];
776   }
777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
778   info->fill_ratio_needed = 0;
779   info->factor_mallocs    = 0;
780   PetscFunctionReturn(0);
781 }
782 
783 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
784 {
785   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
786   PetscErrorCode ierr;
787 
788   PetscFunctionBegin;
789   switch (op) {
790   case MAT_NEW_NONZERO_LOCATIONS:
791   case MAT_NEW_NONZERO_LOCATION_ERR:
792   case MAT_NEW_NONZERO_ALLOCATION_ERR:
793     MatCheckPreallocated(A,1);
794     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
795     break;
796   case MAT_ROW_ORIENTED:
797     MatCheckPreallocated(A,1);
798     a->roworiented = flg;
799     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
800     break;
801   case MAT_FORCE_DIAGONAL_ENTRIES:
802   case MAT_KEEP_NONZERO_PATTERN:
803   case MAT_USE_HASH_TABLE:
804   case MAT_SORTED_FULL:
805     ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
806     break;
807   case MAT_IGNORE_OFF_PROC_ENTRIES:
808     a->donotstash = flg;
809     break;
810   case MAT_SYMMETRIC:
811   case MAT_STRUCTURALLY_SYMMETRIC:
812   case MAT_HERMITIAN:
813   case MAT_SYMMETRY_ETERNAL:
814   case MAT_IGNORE_LOWER_TRIANGULAR:
815   case MAT_IGNORE_ZERO_ENTRIES:
816     ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
817     break;
818   default:
819     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
825 {
826   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
827   const PetscScalar *l;
828   PetscScalar       x,*v,*vv,*r;
829   PetscErrorCode    ierr;
830   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
831 
832   PetscFunctionBegin;
833   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
834   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
835   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
836   if (ll) {
837     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
838     PetscCheckFalse(s2a != s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
839     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
840     for (i=0; i<m; i++) {
841       x = l[i];
842       v = vv + i;
843       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
844     }
845     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
846     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
847   }
848   if (rr) {
849     const PetscScalar *ar;
850 
851     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
852     PetscCheckFalse(s3a != s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
853     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
854     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
855     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr);
856     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr);
857     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
858     for (i=0; i<n; i++) {
859       x = r[i];
860       v = vv + i*lda;
861       for (j=0; j<m; j++) (*v++) *= x;
862     }
863     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
864     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
865   }
866   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
871 {
872   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
873   PetscErrorCode    ierr;
874   PetscInt          i,j;
875   PetscMPIInt       size;
876   PetscReal         sum = 0.0;
877   const PetscScalar *av,*v;
878 
879   PetscFunctionBegin;
880   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
881   v    = av;
882   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
883   if (size == 1) {
884     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
885   } else {
886     if (type == NORM_FROBENIUS) {
887       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
888         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
889       }
890       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
891       *nrm = PetscSqrtReal(*nrm);
892       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
893     } else if (type == NORM_1) {
894       PetscReal *tmp,*tmp2;
895       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
896       *nrm = 0.0;
897       v    = av;
898       for (j=0; j<mdn->A->cmap->n; j++) {
899         for (i=0; i<mdn->A->rmap->n; i++) {
900           tmp[j] += PetscAbsScalar(*v);  v++;
901         }
902       }
903       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
904       for (j=0; j<A->cmap->N; j++) {
905         if (tmp2[j] > *nrm) *nrm = tmp2[j];
906       }
907       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
908       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
909     } else if (type == NORM_INFINITY) { /* max row norm */
910       PetscReal ntemp;
911       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
912       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
913     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
914   }
915   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
920 {
921   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
922   Mat            B;
923   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
924   PetscErrorCode ierr;
925   PetscInt       j,i,lda;
926   PetscScalar    *v;
927 
928   PetscFunctionBegin;
929   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
930     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
931     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
932     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
933     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
934   } else B = *matout;
935 
936   m    = a->A->rmap->n; n = a->A->cmap->n;
937   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
938   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
939   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
940   for (i=0; i<m; i++) rwork[i] = rstart + i;
941   for (j=0; j<n; j++) {
942     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
943     v   += lda;
944   }
945   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
946   ierr = PetscFree(rwork);CHKERRQ(ierr);
947   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
948   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
949   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
950     *matout = B;
951   } else {
952     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
958 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
959 
960 PetscErrorCode MatSetUp_MPIDense(Mat A)
961 {
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
966   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
967   if (!A->preallocated) {
968     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
969   }
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
974 {
975   PetscErrorCode ierr;
976   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
977 
978   PetscFunctionBegin;
979   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatConjugate_MPIDense(Mat mat)
984 {
985   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = MatConjugate(a->A);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 PetscErrorCode MatRealPart_MPIDense(Mat A)
994 {
995   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
996   PetscErrorCode ierr;
997 
998   PetscFunctionBegin;
999   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1004 {
1005   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1014 {
1015   PetscErrorCode ierr;
1016   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1017 
1018   PetscFunctionBegin;
1019   PetscCheckFalse(!a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1020   PetscCheckFalse(!a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1021   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
1026 
1027 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
1028 {
1029   PetscErrorCode ierr;
1030   PetscInt       i,m,n;
1031   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1032   PetscReal      *work;
1033 
1034   PetscFunctionBegin;
1035   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
1036   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1037   if (type == REDUCTION_MEAN_REALPART) {
1038     ierr = MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work);CHKERRQ(ierr);
1039   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1040     ierr = MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work);CHKERRQ(ierr);
1041   } else {
1042     ierr = MatGetColumnReductions_SeqDense(a->A,type,work);CHKERRQ(ierr);
1043   }
1044   if (type == NORM_2) {
1045     for (i=0; i<n; i++) work[i] *= work[i];
1046   }
1047   if (type == NORM_INFINITY) {
1048     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRMPI(ierr);
1049   } else {
1050     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRMPI(ierr);
1051   }
1052   ierr = PetscFree(work);CHKERRQ(ierr);
1053   if (type == NORM_2) {
1054     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1055   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1056     for (i=0; i<n; i++) reductions[i] /= m;
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #if defined(PETSC_HAVE_CUDA)
1062 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1063 {
1064   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1065   PetscErrorCode ierr;
1066   PetscInt       lda;
1067 
1068   PetscFunctionBegin;
1069   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1070   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1071   if (!a->cvec) {
1072     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1073     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1074   }
1075   a->vecinuse = col + 1;
1076   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1077   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1078   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1079   *v   = a->cvec;
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1084 {
1085   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   PetscCheckFalse(!a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1090   PetscCheckFalse(!a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1091   a->vecinuse = 0;
1092   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1093   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1094   *v   = NULL;
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1099 {
1100   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1101   PetscInt       lda;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1106   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1107   if (!a->cvec) {
1108     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1109     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1110   }
1111   a->vecinuse = col + 1;
1112   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1113   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1114   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1115   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1116   *v   = a->cvec;
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1121 {
1122   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   PetscCheckFalse(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1127   PetscCheckFalse(!a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1128   a->vecinuse = 0;
1129   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1130   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1131   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1132   *v   = NULL;
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1137 {
1138   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1139   PetscErrorCode ierr;
1140   PetscInt       lda;
1141 
1142   PetscFunctionBegin;
1143   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1144   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1145   if (!a->cvec) {
1146     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1147     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1148   }
1149   a->vecinuse = col + 1;
1150   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1151   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1152   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1153   *v   = a->cvec;
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1158 {
1159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   PetscCheckFalse(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1164   PetscCheckFalse(!a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1165   a->vecinuse = 0;
1166   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1167   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1168   *v   = NULL;
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1173 {
1174   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   PetscCheckFalse(l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1179   PetscCheckFalse(l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1180   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1185 {
1186   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190   PetscCheckFalse(l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1191   PetscCheckFalse(l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1192   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1197 {
1198   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   PetscCheckFalse(l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1203   PetscCheckFalse(l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1204   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1209 {
1210   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1219 {
1220   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1229 {
1230   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1239 {
1240   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1249 {
1250   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1251   PetscErrorCode ierr;
1252 
1253   PetscFunctionBegin;
1254   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1259 {
1260   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1269 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1270 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1271 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1272 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1273 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1274 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1275 
1276 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1277 {
1278   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282   PetscCheckFalse(d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1283   PetscCheckFalse(d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1284   if (d->A) {
1285     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1286   }
1287   mat->boundtocpu = bind;
1288   if (!bind) {
1289     PetscBool iscuda;
1290 
1291     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1292     if (!iscuda) {
1293       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1294     }
1295     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1296     if (!iscuda) {
1297       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1298     }
1299     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1300     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1301     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1302     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1303     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1304     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1305   } else {
1306     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1307     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1308     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1309     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1310     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1311     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1312   }
1313   if (d->cmat) {
1314     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1315   }
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1320 {
1321   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1322   PetscErrorCode ierr;
1323   PetscBool      iscuda;
1324 
1325   PetscFunctionBegin;
1326   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1327   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1328   if (!iscuda) PetscFunctionReturn(0);
1329   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1330   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1331   if (!d->A) {
1332     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1333     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1334     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1335   }
1336   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1337   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1338   A->preallocated = PETSC_TRUE;
1339   PetscFunctionReturn(0);
1340 }
1341 #endif
1342 
1343 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1344 {
1345   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1354 {
1355   PetscFunctionBegin;
1356   *missing = PETSC_FALSE;
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1361 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1362 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1363 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1364 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1365 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1366 
1367 /* -------------------------------------------------------------------*/
1368 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1369                                         MatGetRow_MPIDense,
1370                                         MatRestoreRow_MPIDense,
1371                                         MatMult_MPIDense,
1372                                 /*  4*/ MatMultAdd_MPIDense,
1373                                         MatMultTranspose_MPIDense,
1374                                         MatMultTransposeAdd_MPIDense,
1375                                         NULL,
1376                                         NULL,
1377                                         NULL,
1378                                 /* 10*/ NULL,
1379                                         NULL,
1380                                         NULL,
1381                                         NULL,
1382                                         MatTranspose_MPIDense,
1383                                 /* 15*/ MatGetInfo_MPIDense,
1384                                         MatEqual_MPIDense,
1385                                         MatGetDiagonal_MPIDense,
1386                                         MatDiagonalScale_MPIDense,
1387                                         MatNorm_MPIDense,
1388                                 /* 20*/ MatAssemblyBegin_MPIDense,
1389                                         MatAssemblyEnd_MPIDense,
1390                                         MatSetOption_MPIDense,
1391                                         MatZeroEntries_MPIDense,
1392                                 /* 24*/ MatZeroRows_MPIDense,
1393                                         NULL,
1394                                         NULL,
1395                                         NULL,
1396                                         NULL,
1397                                 /* 29*/ MatSetUp_MPIDense,
1398                                         NULL,
1399                                         NULL,
1400                                         MatGetDiagonalBlock_MPIDense,
1401                                         NULL,
1402                                 /* 34*/ MatDuplicate_MPIDense,
1403                                         NULL,
1404                                         NULL,
1405                                         NULL,
1406                                         NULL,
1407                                 /* 39*/ MatAXPY_MPIDense,
1408                                         MatCreateSubMatrices_MPIDense,
1409                                         NULL,
1410                                         MatGetValues_MPIDense,
1411                                         MatCopy_MPIDense,
1412                                 /* 44*/ NULL,
1413                                         MatScale_MPIDense,
1414                                         MatShift_Basic,
1415                                         NULL,
1416                                         NULL,
1417                                 /* 49*/ MatSetRandom_MPIDense,
1418                                         NULL,
1419                                         NULL,
1420                                         NULL,
1421                                         NULL,
1422                                 /* 54*/ NULL,
1423                                         NULL,
1424                                         NULL,
1425                                         NULL,
1426                                         NULL,
1427                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1428                                         MatDestroy_MPIDense,
1429                                         MatView_MPIDense,
1430                                         NULL,
1431                                         NULL,
1432                                 /* 64*/ NULL,
1433                                         NULL,
1434                                         NULL,
1435                                         NULL,
1436                                         NULL,
1437                                 /* 69*/ NULL,
1438                                         NULL,
1439                                         NULL,
1440                                         NULL,
1441                                         NULL,
1442                                 /* 74*/ NULL,
1443                                         NULL,
1444                                         NULL,
1445                                         NULL,
1446                                         NULL,
1447                                 /* 79*/ NULL,
1448                                         NULL,
1449                                         NULL,
1450                                         NULL,
1451                                 /* 83*/ MatLoad_MPIDense,
1452                                         NULL,
1453                                         NULL,
1454                                         NULL,
1455                                         NULL,
1456                                         NULL,
1457                                 /* 89*/ NULL,
1458                                         NULL,
1459                                         NULL,
1460                                         NULL,
1461                                         NULL,
1462                                 /* 94*/ NULL,
1463                                         NULL,
1464                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1465                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1466                                         NULL,
1467                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1468                                         NULL,
1469                                         NULL,
1470                                         MatConjugate_MPIDense,
1471                                         NULL,
1472                                 /*104*/ NULL,
1473                                         MatRealPart_MPIDense,
1474                                         MatImaginaryPart_MPIDense,
1475                                         NULL,
1476                                         NULL,
1477                                 /*109*/ NULL,
1478                                         NULL,
1479                                         NULL,
1480                                         MatGetColumnVector_MPIDense,
1481                                         MatMissingDiagonal_MPIDense,
1482                                 /*114*/ NULL,
1483                                         NULL,
1484                                         NULL,
1485                                         NULL,
1486                                         NULL,
1487                                 /*119*/ NULL,
1488                                         NULL,
1489                                         NULL,
1490                                         NULL,
1491                                         NULL,
1492                                 /*124*/ NULL,
1493                                         MatGetColumnReductions_MPIDense,
1494                                         NULL,
1495                                         NULL,
1496                                         NULL,
1497                                 /*129*/ NULL,
1498                                         NULL,
1499                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1500                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1501                                         NULL,
1502                                 /*134*/ NULL,
1503                                         NULL,
1504                                         NULL,
1505                                         NULL,
1506                                         NULL,
1507                                 /*139*/ NULL,
1508                                         NULL,
1509                                         NULL,
1510                                         NULL,
1511                                         NULL,
1512                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1513                                 /*145*/ NULL,
1514                                         NULL,
1515                                         NULL
1516 };
1517 
1518 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1519 {
1520   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1521   PetscBool      iscuda;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1526   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1527   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1528   if (!a->A) {
1529     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1530     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1531     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1532   }
1533   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1534   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1535   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1536   mat->preallocated = PETSC_TRUE;
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1541 {
1542   Mat            B,C;
1543   PetscErrorCode ierr;
1544 
1545   PetscFunctionBegin;
1546   ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr);
1547   ierr = MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1548   ierr = MatDestroy(&C);CHKERRQ(ierr);
1549   if (reuse == MAT_REUSE_MATRIX) {
1550     C = *newmat;
1551   } else C = NULL;
1552   ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1553   ierr = MatDestroy(&B);CHKERRQ(ierr);
1554   if (reuse == MAT_INPLACE_MATRIX) {
1555     ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr);
1556   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1561 {
1562   Mat            B,C;
1563   PetscErrorCode ierr;
1564 
1565   PetscFunctionBegin;
1566   ierr = MatDenseGetLocalMatrix(A,&C);CHKERRQ(ierr);
1567   ierr = MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1568   if (reuse == MAT_REUSE_MATRIX) {
1569     C = *newmat;
1570   } else C = NULL;
1571   ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1572   ierr = MatDestroy(&B);CHKERRQ(ierr);
1573   if (reuse == MAT_INPLACE_MATRIX) {
1574     ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr);
1575   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #if defined(PETSC_HAVE_ELEMENTAL)
1580 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1581 {
1582   Mat            mat_elemental;
1583   PetscErrorCode ierr;
1584   PetscScalar    *v;
1585   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1586 
1587   PetscFunctionBegin;
1588   if (reuse == MAT_REUSE_MATRIX) {
1589     mat_elemental = *newmat;
1590     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1591   } else {
1592     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1593     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1594     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1595     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1596     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1597   }
1598 
1599   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1600   for (i=0; i<N; i++) cols[i] = i;
1601   for (i=0; i<m; i++) rows[i] = rstart + i;
1602 
1603   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1604   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1605   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1606   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1608   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1609   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1610 
1611   if (reuse == MAT_INPLACE_MATRIX) {
1612     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1613   } else {
1614     *newmat = mat_elemental;
1615   }
1616   PetscFunctionReturn(0);
1617 }
1618 #endif
1619 
1620 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1621 {
1622   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1623   PetscErrorCode ierr;
1624 
1625   PetscFunctionBegin;
1626   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1631 {
1632   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1633   PetscErrorCode ierr;
1634 
1635   PetscFunctionBegin;
1636   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1641 {
1642   PetscErrorCode ierr;
1643   Mat_MPIDense   *mat;
1644   PetscInt       m,nloc,N;
1645 
1646   PetscFunctionBegin;
1647   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1648   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1649   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1650     PetscInt sum;
1651 
1652     if (n == PETSC_DECIDE) {
1653       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1654     }
1655     /* Check sum(n) = N */
1656     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
1657     PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1658 
1659     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1660     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
1661   }
1662 
1663   /* numeric phase */
1664   mat = (Mat_MPIDense*)(*outmat)->data;
1665   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1666   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 #if defined(PETSC_HAVE_CUDA)
1672 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1673 {
1674   Mat            B;
1675   Mat_MPIDense   *m;
1676   PetscErrorCode ierr;
1677 
1678   PetscFunctionBegin;
1679   if (reuse == MAT_INITIAL_MATRIX) {
1680     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1681   } else if (reuse == MAT_REUSE_MATRIX) {
1682     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1683   }
1684 
1685   B    = *newmat;
1686   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1687   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1688   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1689   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1696   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1697   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1698   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1699   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1700   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1701   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1702   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1703   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1704   m    = (Mat_MPIDense*)(B)->data;
1705   if (m->A) {
1706     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1707     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1708   }
1709   B->ops->bindtocpu = NULL;
1710   B->offloadmask    = PETSC_OFFLOAD_CPU;
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1715 {
1716   Mat            B;
1717   Mat_MPIDense   *m;
1718   PetscErrorCode ierr;
1719 
1720   PetscFunctionBegin;
1721   if (reuse == MAT_INITIAL_MATRIX) {
1722     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1723   } else if (reuse == MAT_REUSE_MATRIX) {
1724     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1725   }
1726 
1727   B    = *newmat;
1728   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1729   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1730   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1731   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1732   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1733   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1734   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1735   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1736   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1737   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1738   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1739   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1740   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1741   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1742   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1743   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1744   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1745   m    = (Mat_MPIDense*)(B->data);
1746   if (m->A) {
1747     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1748     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1749     B->offloadmask = PETSC_OFFLOAD_BOTH;
1750   } else {
1751     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1752   }
1753   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1754 
1755   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1756   PetscFunctionReturn(0);
1757 }
1758 #endif
1759 
1760 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1761 {
1762   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1763   PetscErrorCode ierr;
1764   PetscInt       lda;
1765 
1766   PetscFunctionBegin;
1767   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1768   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1769   if (!a->cvec) {
1770     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1771   }
1772   a->vecinuse = col + 1;
1773   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1774   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1775   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1776   *v   = a->cvec;
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1781 {
1782   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1783   PetscErrorCode ierr;
1784 
1785   PetscFunctionBegin;
1786   PetscCheckFalse(!a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1787   PetscCheckFalse(!a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1788   a->vecinuse = 0;
1789   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1790   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1791   *v   = NULL;
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1796 {
1797   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1798   PetscErrorCode ierr;
1799   PetscInt       lda;
1800 
1801   PetscFunctionBegin;
1802   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1803   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1804   if (!a->cvec) {
1805     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1806   }
1807   a->vecinuse = col + 1;
1808   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1809   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1810   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1811   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1812   *v   = a->cvec;
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1817 {
1818   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1819   PetscErrorCode ierr;
1820 
1821   PetscFunctionBegin;
1822   PetscCheckFalse(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1823   PetscCheckFalse(!a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1824   a->vecinuse = 0;
1825   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1826   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1827   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1828   *v   = NULL;
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1833 {
1834   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1835   PetscErrorCode ierr;
1836   PetscInt       lda;
1837 
1838   PetscFunctionBegin;
1839   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1840   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1841   if (!a->cvec) {
1842     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1843   }
1844   a->vecinuse = col + 1;
1845   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1846   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1847   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1848   *v   = a->cvec;
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1853 {
1854   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1855   PetscErrorCode ierr;
1856 
1857   PetscFunctionBegin;
1858   PetscCheckFalse(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1859   PetscCheckFalse(!a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1860   a->vecinuse = 0;
1861   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1862   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1863   *v   = NULL;
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1868 {
1869   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1870   Mat_MPIDense   *c;
1871   PetscErrorCode ierr;
1872   MPI_Comm       comm;
1873   PetscBool      setup = PETSC_FALSE;
1874 
1875   PetscFunctionBegin;
1876   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1877   PetscCheckFalse(a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1878   PetscCheckFalse(a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1879   if (!a->cmat) {
1880     setup = PETSC_TRUE;
1881     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1882     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1883     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1884     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1885     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1886     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1887   } else if (cend-cbegin != a->cmat->cmap->N) {
1888     setup = PETSC_TRUE;
1889     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1890     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1891     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1892     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1893   }
1894   c = (Mat_MPIDense*)a->cmat->data;
1895   PetscCheckFalse(c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1896   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1897   if (setup) { /* do we really need this? */
1898     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1899   }
1900   a->cmat->preallocated = PETSC_TRUE;
1901   a->cmat->assembled = PETSC_TRUE;
1902   a->matinuse = cbegin + 1;
1903   *v = a->cmat;
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1908 {
1909   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1910   Mat_MPIDense   *c;
1911   PetscErrorCode ierr;
1912 
1913   PetscFunctionBegin;
1914   PetscCheckFalse(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1915   PetscCheckFalse(!a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1916   PetscCheckFalse(*v != a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1917   a->matinuse = 0;
1918   c    = (Mat_MPIDense*)a->cmat->data;
1919   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1920   *v   = NULL;
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 /*MC
1925    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1926 
1927    Options Database Keys:
1928 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1929 
1930   Level: beginner
1931 
1932 .seealso: MatCreateDense()
1933 
1934 M*/
1935 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1936 {
1937   Mat_MPIDense   *a;
1938   PetscErrorCode ierr;
1939 
1940   PetscFunctionBegin;
1941   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1942   mat->data = (void*)a;
1943   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1944 
1945   mat->insertmode = NOT_SET_VALUES;
1946 
1947   /* build cache for off array entries formed */
1948   a->donotstash = PETSC_FALSE;
1949 
1950   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1951 
1952   /* stuff used for matrix vector multiply */
1953   a->lvec        = NULL;
1954   a->Mvctx       = NULL;
1955   a->roworiented = PETSC_TRUE;
1956 
1957   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1958   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1959   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1960   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1961   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1962   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1963   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1964   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1965   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1966   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1967   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1968   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1969   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1970   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1971   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1972   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1973   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1974   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1975   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1976   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
1977   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ);CHKERRQ(ierr);
1978 #if defined(PETSC_HAVE_ELEMENTAL)
1979   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1980 #endif
1981 #if defined(PETSC_HAVE_SCALAPACK)
1982   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1983 #endif
1984 #if defined(PETSC_HAVE_CUDA)
1985   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1986 #endif
1987   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1988   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1989   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1990 #if defined(PETSC_HAVE_CUDA)
1991   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1992   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1993 #endif
1994 
1995   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1996   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1997   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 /*MC
2002    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
2003 
2004    Options Database Keys:
2005 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
2006 
2007   Level: beginner
2008 
2009 .seealso:
2010 
2011 M*/
2012 #if defined(PETSC_HAVE_CUDA)
2013 #include <petsc/private/deviceimpl.h>
2014 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2015 {
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   ierr = PetscDeviceInitialize(PETSC_DEVICE_CUDA);CHKERRQ(ierr);
2020   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
2021   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 #endif
2025 
2026 /*MC
2027    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2028 
2029    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
2030    and MATMPIDENSE otherwise.
2031 
2032    Options Database Keys:
2033 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
2034 
2035   Level: beginner
2036 
2037 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
2038 M*/
2039 
2040 /*MC
2041    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2042 
2043    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
2044    and MATMPIDENSECUDA otherwise.
2045 
2046    Options Database Keys:
2047 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
2048 
2049   Level: beginner
2050 
2051 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
2052 M*/
2053 
2054 /*@C
2055    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2056 
2057    Collective
2058 
2059    Input Parameters:
2060 .  B - the matrix
2061 -  data - optional location of matrix data.  Set data=NULL for PETSc
2062    to control all matrix memory allocation.
2063 
2064    Notes:
2065    The dense format is fully compatible with standard Fortran 77
2066    storage by columns.
2067 
2068    The data input variable is intended primarily for Fortran programmers
2069    who wish to allocate their own matrix memory space.  Most users should
2070    set data=NULL.
2071 
2072    Level: intermediate
2073 
2074 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2075 @*/
2076 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2077 {
2078   PetscErrorCode ierr;
2079 
2080   PetscFunctionBegin;
2081   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2082   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 /*@
2087    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2088    array provided by the user. This is useful to avoid copying an array
2089    into a matrix
2090 
2091    Not Collective
2092 
2093    Input Parameters:
2094 +  mat - the matrix
2095 -  array - the array in column major order
2096 
2097    Notes:
2098    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2099    freed when the matrix is destroyed.
2100 
2101    Level: developer
2102 
2103 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2104 
2105 @*/
2106 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2107 {
2108   PetscErrorCode ierr;
2109 
2110   PetscFunctionBegin;
2111   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2112   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2113   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2114 #if defined(PETSC_HAVE_CUDA)
2115   mat->offloadmask = PETSC_OFFLOAD_CPU;
2116 #endif
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 /*@
2121    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2122 
2123    Not Collective
2124 
2125    Input Parameters:
2126 .  mat - the matrix
2127 
2128    Notes:
2129    You can only call this after a call to MatDensePlaceArray()
2130 
2131    Level: developer
2132 
2133 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2134 
2135 @*/
2136 PetscErrorCode  MatDenseResetArray(Mat mat)
2137 {
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2142   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2143   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 /*@
2148    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2149    array provided by the user. This is useful to avoid copying an array
2150    into a matrix
2151 
2152    Not Collective
2153 
2154    Input Parameters:
2155 +  mat - the matrix
2156 -  array - the array in column major order
2157 
2158    Notes:
2159    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2160    freed by the user. It will be freed when the matrix is destroyed.
2161 
2162    Level: developer
2163 
2164 .seealso: MatDenseGetArray(), VecReplaceArray()
2165 @*/
2166 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2167 {
2168   PetscErrorCode ierr;
2169 
2170   PetscFunctionBegin;
2171   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2172   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2173   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2174 #if defined(PETSC_HAVE_CUDA)
2175   mat->offloadmask = PETSC_OFFLOAD_CPU;
2176 #endif
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 #if defined(PETSC_HAVE_CUDA)
2181 /*@C
2182    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2183    array provided by the user. This is useful to avoid copying an array
2184    into a matrix
2185 
2186    Not Collective
2187 
2188    Input Parameters:
2189 +  mat - the matrix
2190 -  array - the array in column major order
2191 
2192    Notes:
2193    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2194    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2195 
2196    Level: developer
2197 
2198 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2199 @*/
2200 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2201 {
2202   PetscErrorCode ierr;
2203 
2204   PetscFunctionBegin;
2205   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2206   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2207   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2208   mat->offloadmask = PETSC_OFFLOAD_GPU;
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 /*@C
2213    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2214 
2215    Not Collective
2216 
2217    Input Parameters:
2218 .  mat - the matrix
2219 
2220    Notes:
2221    You can only call this after a call to MatDenseCUDAPlaceArray()
2222 
2223    Level: developer
2224 
2225 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2226 
2227 @*/
2228 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2229 {
2230   PetscErrorCode ierr;
2231 
2232   PetscFunctionBegin;
2233   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2234   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2235   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 /*@C
2240    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2241    array provided by the user. This is useful to avoid copying an array
2242    into a matrix
2243 
2244    Not Collective
2245 
2246    Input Parameters:
2247 +  mat - the matrix
2248 -  array - the array in column major order
2249 
2250    Notes:
2251    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2252    The memory passed in CANNOT be freed by the user. It will be freed
2253    when the matrix is destroyed. The array should respect the matrix leading dimension.
2254 
2255    Level: developer
2256 
2257 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2258 @*/
2259 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2265   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2266   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2267   mat->offloadmask = PETSC_OFFLOAD_GPU;
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 /*@C
2272    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2273 
2274    Not Collective
2275 
2276    Input Parameters:
2277 .  A - the matrix
2278 
2279    Output Parameters
2280 .  array - the GPU array in column major order
2281 
2282    Notes:
2283    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2284 
2285    Level: developer
2286 
2287 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2288 @*/
2289 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2290 {
2291   PetscErrorCode ierr;
2292 
2293   PetscFunctionBegin;
2294   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2295   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2296   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 /*@C
2301    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2302 
2303    Not Collective
2304 
2305    Input Parameters:
2306 +  A - the matrix
2307 -  array - the GPU array in column major order
2308 
2309    Notes:
2310 
2311    Level: developer
2312 
2313 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2314 @*/
2315 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2316 {
2317   PetscErrorCode ierr;
2318 
2319   PetscFunctionBegin;
2320   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2321   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2322   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2323   A->offloadmask = PETSC_OFFLOAD_GPU;
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 /*@C
2328    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2329 
2330    Not Collective
2331 
2332    Input Parameters:
2333 .  A - the matrix
2334 
2335    Output Parameters
2336 .  array - the GPU array in column major order
2337 
2338    Notes:
2339    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2340 
2341    Level: developer
2342 
2343 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2344 @*/
2345 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2346 {
2347   PetscErrorCode ierr;
2348 
2349   PetscFunctionBegin;
2350   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2351   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 /*@C
2356    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2357 
2358    Not Collective
2359 
2360    Input Parameters:
2361 +  A - the matrix
2362 -  array - the GPU array in column major order
2363 
2364    Notes:
2365    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2366 
2367    Level: developer
2368 
2369 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2370 @*/
2371 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2372 {
2373   PetscErrorCode ierr;
2374 
2375   PetscFunctionBegin;
2376   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 /*@C
2381    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2382 
2383    Not Collective
2384 
2385    Input Parameters:
2386 .  A - the matrix
2387 
2388    Output Parameters
2389 .  array - the GPU array in column major order
2390 
2391    Notes:
2392    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2393 
2394    Level: developer
2395 
2396 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2397 @*/
2398 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2399 {
2400   PetscErrorCode ierr;
2401 
2402   PetscFunctionBegin;
2403   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2404   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2405   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 /*@C
2410    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2411 
2412    Not Collective
2413 
2414    Input Parameters:
2415 +  A - the matrix
2416 -  array - the GPU array in column major order
2417 
2418    Notes:
2419 
2420    Level: developer
2421 
2422 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2423 @*/
2424 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2425 {
2426   PetscErrorCode ierr;
2427 
2428   PetscFunctionBegin;
2429   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2430   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2431   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2432   A->offloadmask = PETSC_OFFLOAD_GPU;
2433   PetscFunctionReturn(0);
2434 }
2435 #endif
2436 
2437 /*@C
2438    MatCreateDense - Creates a matrix in dense format.
2439 
2440    Collective
2441 
2442    Input Parameters:
2443 +  comm - MPI communicator
2444 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2445 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2446 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2447 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2448 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2449    to control all matrix memory allocation.
2450 
2451    Output Parameter:
2452 .  A - the matrix
2453 
2454    Notes:
2455    The dense format is fully compatible with standard Fortran 77
2456    storage by columns.
2457    Note that, although local portions of the matrix are stored in column-major
2458    order, the matrix is partitioned across MPI ranks by row.
2459 
2460    The data input variable is intended primarily for Fortran programmers
2461    who wish to allocate their own matrix memory space.  Most users should
2462    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2463 
2464    The user MUST specify either the local or global matrix dimensions
2465    (possibly both).
2466 
2467    Level: intermediate
2468 
2469 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2470 @*/
2471 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2472 {
2473   PetscErrorCode ierr;
2474   PetscMPIInt    size;
2475 
2476   PetscFunctionBegin;
2477   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2478   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2479   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2480   if (size > 1) {
2481     PetscBool havedata = (PetscBool)!!data;
2482 
2483     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2484     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2485     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr);
2486     if (havedata) {  /* user provided data array, so no need to assemble */
2487       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2488       (*A)->assembled = PETSC_TRUE;
2489     }
2490   } else {
2491     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2492     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2493   }
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 #if defined(PETSC_HAVE_CUDA)
2498 /*@C
2499    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2500 
2501    Collective
2502 
2503    Input Parameters:
2504 +  comm - MPI communicator
2505 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2506 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2507 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2508 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2509 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2510    to control matrix memory allocation.
2511 
2512    Output Parameter:
2513 .  A - the matrix
2514 
2515    Notes:
2516 
2517    Level: intermediate
2518 
2519 .seealso: MatCreate(), MatCreateDense()
2520 @*/
2521 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2522 {
2523   PetscErrorCode ierr;
2524   PetscMPIInt    size;
2525 
2526   PetscFunctionBegin;
2527   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2528   PetscValidLogicalCollectiveBool(*A,!!data,6);
2529   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2530   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2531   if (size > 1) {
2532     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2533     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2534     if (data) {  /* user provided data array, so no need to assemble */
2535       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2536       (*A)->assembled = PETSC_TRUE;
2537     }
2538   } else {
2539     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2540     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2541   }
2542   PetscFunctionReturn(0);
2543 }
2544 #endif
2545 
2546 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2547 {
2548   Mat            mat;
2549   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2550   PetscErrorCode ierr;
2551 
2552   PetscFunctionBegin;
2553   *newmat = NULL;
2554   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2555   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2556   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2557   a       = (Mat_MPIDense*)mat->data;
2558 
2559   mat->factortype   = A->factortype;
2560   mat->assembled    = PETSC_TRUE;
2561   mat->preallocated = PETSC_TRUE;
2562 
2563   mat->insertmode = NOT_SET_VALUES;
2564   a->donotstash   = oldmat->donotstash;
2565 
2566   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2567   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2568 
2569   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2570   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2571   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2572 
2573   *newmat = mat;
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2578 {
2579   PetscErrorCode ierr;
2580   PetscBool      isbinary;
2581 #if defined(PETSC_HAVE_HDF5)
2582   PetscBool      ishdf5;
2583 #endif
2584 
2585   PetscFunctionBegin;
2586   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2587   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2588   /* force binary viewer to load .info file if it has not yet done so */
2589   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2590   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2591 #if defined(PETSC_HAVE_HDF5)
2592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2593 #endif
2594   if (isbinary) {
2595     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2596 #if defined(PETSC_HAVE_HDF5)
2597   } else if (ishdf5) {
2598     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2599 #endif
2600   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2605 {
2606   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2607   Mat            a,b;
2608   PetscBool      flg;
2609   PetscErrorCode ierr;
2610 
2611   PetscFunctionBegin;
2612   a    = matA->A;
2613   b    = matB->A;
2614   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2615   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2620 {
2621   PetscErrorCode        ierr;
2622   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2623 
2624   PetscFunctionBegin;
2625   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2626   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2627   ierr = PetscFree(atb);CHKERRQ(ierr);
2628   PetscFunctionReturn(0);
2629 }
2630 
2631 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2632 {
2633   PetscErrorCode        ierr;
2634   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2635 
2636   PetscFunctionBegin;
2637   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2638   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2639   ierr = PetscFree(abt);CHKERRQ(ierr);
2640   PetscFunctionReturn(0);
2641 }
2642 
2643 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2644 {
2645   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2646   Mat_TransMatMultDense *atb;
2647   PetscErrorCode        ierr;
2648   MPI_Comm              comm;
2649   PetscMPIInt           size,*recvcounts;
2650   PetscScalar           *carray,*sendbuf;
2651   const PetscScalar     *atbarray;
2652   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2653   const PetscInt        *ranges;
2654 
2655   PetscFunctionBegin;
2656   MatCheckProduct(C,3);
2657   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2658   atb = (Mat_TransMatMultDense *)C->product->data;
2659   recvcounts = atb->recvcounts;
2660   sendbuf = atb->sendbuf;
2661 
2662   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2663   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2664 
2665   /* compute atbarray = aseq^T * bseq */
2666   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2667 
2668   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2669 
2670   /* arrange atbarray into sendbuf */
2671   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2672   ierr = MatDenseGetLDA(atb->atb,&lda);CHKERRQ(ierr);
2673   for (proc=0, k=0; proc<size; proc++) {
2674     for (j=0; j<cN; j++) {
2675       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2676     }
2677   }
2678   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2679 
2680   /* sum all atbarray to local values of C */
2681   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2682   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr);
2683   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2684   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2685   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2690 {
2691   PetscErrorCode        ierr;
2692   MPI_Comm              comm;
2693   PetscMPIInt           size;
2694   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2695   Mat_TransMatMultDense *atb;
2696   PetscBool             cisdense;
2697   PetscInt              i;
2698   const PetscInt        *ranges;
2699 
2700   PetscFunctionBegin;
2701   MatCheckProduct(C,3);
2702   PetscCheckFalse(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2703   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2704   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2705     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2706   }
2707 
2708   /* create matrix product C */
2709   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2710   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2711   if (!cisdense) {
2712     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2713   }
2714   ierr = MatSetUp(C);CHKERRQ(ierr);
2715 
2716   /* create data structure for reuse C */
2717   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2718   ierr = PetscNew(&atb);CHKERRQ(ierr);
2719   cM   = C->rmap->N;
2720   ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2721   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2722   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2723 
2724   C->product->data    = atb;
2725   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2730 {
2731   PetscErrorCode        ierr;
2732   MPI_Comm              comm;
2733   PetscMPIInt           i, size;
2734   PetscInt              maxRows, bufsiz;
2735   PetscMPIInt           tag;
2736   PetscInt              alg;
2737   Mat_MatTransMultDense *abt;
2738   Mat_Product           *product = C->product;
2739   PetscBool             flg;
2740 
2741   PetscFunctionBegin;
2742   MatCheckProduct(C,4);
2743   PetscCheckFalse(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2744   /* check local size of A and B */
2745   PetscCheckFalse(A->cmap->n != B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2746 
2747   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2748   alg  = flg ? 0 : 1;
2749 
2750   /* setup matrix product C */
2751   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2752   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2753   ierr = MatSetUp(C);CHKERRQ(ierr);
2754   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2755 
2756   /* create data structure for reuse C */
2757   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2758   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2759   ierr = PetscNew(&abt);CHKERRQ(ierr);
2760   abt->tag = tag;
2761   abt->alg = alg;
2762   switch (alg) {
2763   case 1: /* alg: "cyclic" */
2764     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2765     bufsiz = A->cmap->N * maxRows;
2766     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2767     break;
2768   default: /* alg: "allgatherv" */
2769     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2770     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2771     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2772     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2773     break;
2774   }
2775 
2776   C->product->data    = abt;
2777   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2778   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2783 {
2784   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2785   Mat_MatTransMultDense *abt;
2786   PetscErrorCode        ierr;
2787   MPI_Comm              comm;
2788   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2789   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2790   PetscInt              i,cK=A->cmap->N,k,j,bn;
2791   PetscScalar           _DOne=1.0,_DZero=0.0;
2792   const PetscScalar     *av,*bv;
2793   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2794   MPI_Request           reqs[2];
2795   const PetscInt        *ranges;
2796 
2797   PetscFunctionBegin;
2798   MatCheckProduct(C,3);
2799   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2800   abt  = (Mat_MatTransMultDense*)C->product->data;
2801   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2802   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2803   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2804   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2805   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2806   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2807   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2808   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2809   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2810   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2811   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2812   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2813   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2814   bn   = B->rmap->n;
2815   if (blda == bn) {
2816     sendbuf = (PetscScalar*)bv;
2817   } else {
2818     sendbuf = abt->buf[0];
2819     for (k = 0, i = 0; i < cK; i++) {
2820       for (j = 0; j < bn; j++, k++) {
2821         sendbuf[k] = bv[i * blda + j];
2822       }
2823     }
2824   }
2825   if (size > 1) {
2826     sendto = (rank + size - 1) % size;
2827     recvfrom = (rank + size + 1) % size;
2828   } else {
2829     sendto = recvfrom = 0;
2830   }
2831   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2832   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2833   recvisfrom = rank;
2834   for (i = 0; i < size; i++) {
2835     /* we have finished receiving in sending, bufs can be read/modified */
2836     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2837     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2838 
2839     if (nextrecvisfrom != rank) {
2840       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2841       sendsiz = cK * bn;
2842       recvsiz = cK * nextbn;
2843       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2844       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr);
2845       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr);
2846     }
2847 
2848     /* local aseq * sendbuf^T */
2849     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2850     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2851 
2852     if (nextrecvisfrom != rank) {
2853       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2854       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
2855     }
2856     bn = nextbn;
2857     recvisfrom = nextrecvisfrom;
2858     sendbuf = recvbuf;
2859   }
2860   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2861   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2862   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2863   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2864   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2869 {
2870   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2871   Mat_MatTransMultDense *abt;
2872   PetscErrorCode        ierr;
2873   MPI_Comm              comm;
2874   PetscMPIInt           size;
2875   PetscScalar           *cv, *sendbuf, *recvbuf;
2876   const PetscScalar     *av,*bv;
2877   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2878   PetscScalar           _DOne=1.0,_DZero=0.0;
2879   PetscBLASInt          cm, cn, ck, alda, clda;
2880 
2881   PetscFunctionBegin;
2882   MatCheckProduct(C,3);
2883   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2884   abt  = (Mat_MatTransMultDense*)C->product->data;
2885   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2886   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2887   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2888   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2889   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2890   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2891   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2892   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2893   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2894   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2895   /* copy transpose of B into buf[0] */
2896   bn      = B->rmap->n;
2897   sendbuf = abt->buf[0];
2898   recvbuf = abt->buf[1];
2899   for (k = 0, j = 0; j < bn; j++) {
2900     for (i = 0; i < cK; i++, k++) {
2901       sendbuf[k] = bv[i * blda + j];
2902     }
2903   }
2904   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2905   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr);
2906   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2907   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2908   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2909   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2910   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2911   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2912   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2913   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2914   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2915   PetscFunctionReturn(0);
2916 }
2917 
2918 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2919 {
2920   Mat_MatTransMultDense *abt;
2921   PetscErrorCode        ierr;
2922 
2923   PetscFunctionBegin;
2924   MatCheckProduct(C,3);
2925   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2926   abt = (Mat_MatTransMultDense*)C->product->data;
2927   switch (abt->alg) {
2928   case 1:
2929     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2930     break;
2931   default:
2932     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2933     break;
2934   }
2935   PetscFunctionReturn(0);
2936 }
2937 
2938 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2939 {
2940   PetscErrorCode   ierr;
2941   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2942 
2943   PetscFunctionBegin;
2944   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2945   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2946   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2947   ierr = PetscFree(ab);CHKERRQ(ierr);
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 #if defined(PETSC_HAVE_ELEMENTAL)
2952 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2953 {
2954   PetscErrorCode   ierr;
2955   Mat_MatMultDense *ab;
2956 
2957   PetscFunctionBegin;
2958   MatCheckProduct(C,3);
2959   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2960   ab   = (Mat_MatMultDense*)C->product->data;
2961   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2962   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2963   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2964   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2965   PetscFunctionReturn(0);
2966 }
2967 
2968 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2969 {
2970   PetscErrorCode   ierr;
2971   Mat              Ae,Be,Ce;
2972   Mat_MatMultDense *ab;
2973 
2974   PetscFunctionBegin;
2975   MatCheckProduct(C,4);
2976   PetscCheckFalse(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2977   /* check local size of A and B */
2978   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2979     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2980   }
2981 
2982   /* create elemental matrices Ae and Be */
2983   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2984   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2985   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2986   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2987   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2988 
2989   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2990   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2991   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2992   ierr = MatSetUp(Be);CHKERRQ(ierr);
2993   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2994 
2995   /* compute symbolic Ce = Ae*Be */
2996   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2997   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2998 
2999   /* setup C */
3000   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
3001   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
3002   ierr = MatSetUp(C);CHKERRQ(ierr);
3003 
3004   /* create data structure for reuse Cdense */
3005   ierr = PetscNew(&ab);CHKERRQ(ierr);
3006   ab->Ae = Ae;
3007   ab->Be = Be;
3008   ab->Ce = Ce;
3009 
3010   C->product->data    = ab;
3011   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
3012   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3013   PetscFunctionReturn(0);
3014 }
3015 #endif
3016 /* ----------------------------------------------- */
3017 #if defined(PETSC_HAVE_ELEMENTAL)
3018 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3019 {
3020   PetscFunctionBegin;
3021   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3022   C->ops->productsymbolic = MatProductSymbolic_AB;
3023   PetscFunctionReturn(0);
3024 }
3025 #endif
3026 
3027 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3028 {
3029   Mat_Product *product = C->product;
3030   Mat         A = product->A,B=product->B;
3031 
3032   PetscFunctionBegin;
3033   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3034     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
3035   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3036   C->ops->productsymbolic = MatProductSymbolic_AtB;
3037   PetscFunctionReturn(0);
3038 }
3039 
3040 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3041 {
3042   PetscErrorCode ierr;
3043   Mat_Product    *product = C->product;
3044   const char     *algTypes[2] = {"allgatherv","cyclic"};
3045   PetscInt       alg,nalg = 2;
3046   PetscBool      flg = PETSC_FALSE;
3047 
3048   PetscFunctionBegin;
3049   /* Set default algorithm */
3050   alg = 0; /* default is allgatherv */
3051   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
3052   if (flg) {
3053     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3054   }
3055 
3056   /* Get runtime option */
3057   if (product->api_user) {
3058     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
3059     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3060     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3061   } else {
3062     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
3063     ierr = PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3064     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3065   }
3066   if (flg) {
3067     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3068   }
3069 
3070   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3071   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3072   PetscFunctionReturn(0);
3073 }
3074 
3075 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3076 {
3077   PetscErrorCode ierr;
3078   Mat_Product    *product = C->product;
3079 
3080   PetscFunctionBegin;
3081   switch (product->type) {
3082 #if defined(PETSC_HAVE_ELEMENTAL)
3083   case MATPRODUCT_AB:
3084     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3085     break;
3086 #endif
3087   case MATPRODUCT_AtB:
3088     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3089     break;
3090   case MATPRODUCT_ABt:
3091     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3092     break;
3093   default:
3094     break;
3095   }
3096   PetscFunctionReturn(0);
3097 }
3098