xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 2fa40bb9206b96114faa7cb222621ec184d31cd2)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
46   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
47   PetscErrorCode ierr;
48 
49   PetscFunctionBegin;
50   ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
55 {
56   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
57   PetscErrorCode ierr;
58   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
59 
60   PetscFunctionBegin;
61   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
62   lrow = row - rstart;
63   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
68 {
69   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
70   PetscErrorCode ierr;
71   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
72 
73   PetscFunctionBegin;
74   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
75   lrow = row - rstart;
76   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
81 {
82   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
83   PetscErrorCode ierr;
84   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
85   PetscScalar    *array;
86   MPI_Comm       comm;
87   PetscBool      flg;
88   Mat            B;
89 
90   PetscFunctionBegin;
91   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
92   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
93   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
94   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
95 
96     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
97     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
98     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
99     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
100     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
101     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
102     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
103     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
104     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
105     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
106     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
107     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
108     *a   = B;
109     ierr = MatDestroy(&B);CHKERRQ(ierr);
110   } else *a = B;
111   PetscFunctionReturn(0);
112 }
113 
114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
117   PetscErrorCode ierr;
118   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
119   PetscBool      roworiented = A->roworiented;
120 
121   PetscFunctionBegin;
122   for (i=0; i<m; i++) {
123     if (idxm[i] < 0) continue;
124     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
125     if (idxm[i] >= rstart && idxm[i] < rend) {
126       row = idxm[i] - rstart;
127       if (roworiented) {
128         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
129       } else {
130         for (j=0; j<n; j++) {
131           if (idxn[j] < 0) continue;
132           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
133           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
134         }
135       }
136     } else if (!A->donotstash) {
137       mat->assembled = PETSC_FALSE;
138       if (roworiented) {
139         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
140       } else {
141         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
142       }
143     }
144   }
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
149 {
150   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
151   PetscErrorCode ierr;
152   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
153 
154   PetscFunctionBegin;
155   for (i=0; i<m; i++) {
156     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
157     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
158     if (idxm[i] >= rstart && idxm[i] < rend) {
159       row = idxm[i] - rstart;
160       for (j=0; j<n; j++) {
161         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
162         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
163         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
164       }
165     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
181 {
182   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
183   PetscBool      iscuda;
184   PetscErrorCode ierr;
185 
186   PetscFunctionBegin;
187   if (!a->A) {
188     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
189     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
190     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
191     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
192     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
193     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
194     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
195     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
196   }
197   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
230   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
235 {
236   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
237   PetscErrorCode ierr;
238 
239   PetscFunctionBegin;
240   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
241   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
242   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
247 {
248   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
253   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
254   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
259 {
260   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
261   PetscErrorCode ierr;
262 
263   PetscFunctionBegin;
264   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
265   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
266   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
271 {
272   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
273   PetscErrorCode    ierr;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
284   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
285   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
288   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
289   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
290   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
291   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
292   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
299   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
309     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
310     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
311     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
317   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
318   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
326   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
327 
328   /* Assemble the matrices so that the correct flags are set */
329   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331 
332   /* Free work space */
333   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
334   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
335   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343   PetscErrorCode ierr;
344 
345   PetscFunctionBegin;
346   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
351 {
352   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
361 {
362   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
363   PetscErrorCode ierr;
364 
365   PetscFunctionBegin;
366   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
371 {
372   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
373   PetscErrorCode ierr;
374   PetscInt       nstash,reallocs;
375 
376   PetscFunctionBegin;
377   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
378 
379   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
380   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
381   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
386 {
387   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
388   PetscErrorCode ierr;
389   PetscInt       i,*row,*col,flg,j,rstart,ncols;
390   PetscMPIInt    n;
391   PetscScalar    *val;
392 
393   PetscFunctionBegin;
394   if (!mdn->donotstash && !mat->nooffprocentries) {
395     /*  wait on receives */
396     while (1) {
397       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
398       if (!flg) break;
399 
400       for (i=0; i<n;) {
401         /* Now identify the consecutive vals belonging to the same row */
402         for (j=i,rstart=row[j]; j<n; j++) {
403           if (row[j] != rstart) break;
404         }
405         if (j < n) ncols = j-i;
406         else       ncols = n-i;
407         /* Now assemble all these values with a single function call */
408         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
409         i    = j;
410       }
411     }
412     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
413   }
414 
415   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
417 
418   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
419     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
425 {
426   PetscErrorCode ierr;
427   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
428 
429   PetscFunctionBegin;
430   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
435 {
436   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
437   PetscErrorCode    ierr;
438   PetscInt          i,len,*lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
449     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
450     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
451     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
452     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
453   }
454   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
455   if (diag != 0.0) {
456     Vec d;
457 
458     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
459     ierr = VecSet(d,diag);CHKERRQ(ierr);
460     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
461     ierr = VecDestroy(&d);CHKERRQ(ierr);
462   }
463   ierr = PetscFree(lrows);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode    ierr;
476   const PetscScalar *ax;
477   PetscScalar       *ay;
478   PetscMemType      axmtype,aymtype;
479 
480   PetscFunctionBegin;
481   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
482   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
483   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr);
484   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr);
485   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
486   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
487   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
494   PetscErrorCode    ierr;
495   const PetscScalar *ax;
496   PetscScalar       *ay;
497   PetscMemType      axmtype,aymtype;
498 
499   PetscFunctionBegin;
500   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
501   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
502   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr);
503   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr);
504   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
505   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
506   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode    ierr;
514   const PetscScalar *ax;
515   PetscScalar       *ay;
516   PetscMemType      axmtype,aymtype;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
520   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
522   ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr);
523   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
524   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
525   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
526   ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
533   PetscErrorCode    ierr;
534   const PetscScalar *ax;
535   PetscScalar       *ay;
536   PetscMemType      axmtype,aymtype;
537 
538   PetscFunctionBegin;
539   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
540   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
541   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
542   ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr);
543   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
544   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
545   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
546   ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
551 {
552   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
553   PetscErrorCode    ierr;
554   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
555   PetscScalar       *x,zero = 0.0;
556   const PetscScalar *av;
557 
558   PetscFunctionBegin;
559   ierr = VecSet(v,zero);CHKERRQ(ierr);
560   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
561   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
562   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
563   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
564   radd = A->rmap->rstart*m;
565   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
566   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
567   for (i=0; i<len; i++) {
568     x[i] = av[radd + i*lda + i];
569   }
570   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
571   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatDestroy_MPIDense(Mat mat)
576 {
577   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
578   PetscErrorCode ierr;
579 
580   PetscFunctionBegin;
581 #if defined(PETSC_USE_LOG)
582   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
583 #endif
584   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
585   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
586   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
587   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
588   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
589   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
590   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
591   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
592 
593   ierr = PetscFree(mat->data);CHKERRQ(ierr);
594   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
595 
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
609 #if defined(PETSC_HAVE_ELEMENTAL)
610   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
611 #endif
612 #if defined(PETSC_HAVE_SCALAPACK)
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
614 #endif
615   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
616   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
617   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
618 #if defined (PETSC_HAVE_CUDA)
619   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
620   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
621 #endif
622   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
623   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
624   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
625   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
626   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
627   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
628   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
630   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
631   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
636 
637 #include <petscdraw.h>
638 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
639 {
640   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
641   PetscErrorCode    ierr;
642   PetscMPIInt       rank;
643   PetscViewerType   vtype;
644   PetscBool         iascii,isdraw;
645   PetscViewer       sviewer;
646   PetscViewerFormat format;
647 
648   PetscFunctionBegin;
649   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
650   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
651   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
652   if (iascii) {
653     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
654     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
655     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
656       MatInfo info;
657       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
658       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
659       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
660       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
661       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
662       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
663       PetscFunctionReturn(0);
664     } else if (format == PETSC_VIEWER_ASCII_INFO) {
665       PetscFunctionReturn(0);
666     }
667   } else if (isdraw) {
668     PetscDraw draw;
669     PetscBool isnull;
670 
671     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
672     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
673     if (isnull) PetscFunctionReturn(0);
674   }
675 
676   {
677     /* assemble the entire matrix onto first processor. */
678     Mat         A;
679     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
680     PetscInt    *cols;
681     PetscScalar *vals;
682 
683     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
684     if (rank == 0) {
685       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
686     } else {
687       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
688     }
689     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
690     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
691     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
692     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
693 
694     /* Copy the matrix ... This isn't the most efficient means,
695        but it's quick for now */
696     A->insertmode = INSERT_VALUES;
697 
698     row = mat->rmap->rstart;
699     m   = mdn->A->rmap->n;
700     for (i=0; i<m; i++) {
701       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
702       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
703       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
704       row++;
705     }
706 
707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
710     if (rank == 0) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
715     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
716     ierr = MatDestroy(&A);CHKERRQ(ierr);
717   }
718   PetscFunctionReturn(0);
719 }
720 
721 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
722 {
723   PetscErrorCode ierr;
724   PetscBool      iascii,isbinary,isdraw,issocket;
725 
726   PetscFunctionBegin;
727   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
728   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
729   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
730   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
731 
732   if (iascii || issocket || isdraw) {
733     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else if (isbinary) {
735     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
741 {
742   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
743   Mat            mdn  = mat->A;
744   PetscErrorCode ierr;
745   PetscLogDouble isend[5],irecv[5];
746 
747   PetscFunctionBegin;
748   info->block_size = 1.0;
749 
750   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
751 
752   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
753   isend[3] = info->memory;  isend[4] = info->mallocs;
754   if (flag == MAT_LOCAL) {
755     info->nz_used      = isend[0];
756     info->nz_allocated = isend[1];
757     info->nz_unneeded  = isend[2];
758     info->memory       = isend[3];
759     info->mallocs      = isend[4];
760   } else if (flag == MAT_GLOBAL_MAX) {
761     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
762 
763     info->nz_used      = irecv[0];
764     info->nz_allocated = irecv[1];
765     info->nz_unneeded  = irecv[2];
766     info->memory       = irecv[3];
767     info->mallocs      = irecv[4];
768   } else if (flag == MAT_GLOBAL_SUM) {
769     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
770 
771     info->nz_used      = irecv[0];
772     info->nz_allocated = irecv[1];
773     info->nz_unneeded  = irecv[2];
774     info->memory       = irecv[3];
775     info->mallocs      = irecv[4];
776   }
777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
778   info->fill_ratio_needed = 0;
779   info->factor_mallocs    = 0;
780   PetscFunctionReturn(0);
781 }
782 
783 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
784 {
785   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
786   PetscErrorCode ierr;
787 
788   PetscFunctionBegin;
789   switch (op) {
790   case MAT_NEW_NONZERO_LOCATIONS:
791   case MAT_NEW_NONZERO_LOCATION_ERR:
792   case MAT_NEW_NONZERO_ALLOCATION_ERR:
793     MatCheckPreallocated(A,1);
794     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
795     break;
796   case MAT_ROW_ORIENTED:
797     MatCheckPreallocated(A,1);
798     a->roworiented = flg;
799     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
800     break;
801   case MAT_FORCE_DIAGONAL_ENTRIES:
802   case MAT_KEEP_NONZERO_PATTERN:
803   case MAT_USE_HASH_TABLE:
804   case MAT_SORTED_FULL:
805     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
806     break;
807   case MAT_IGNORE_OFF_PROC_ENTRIES:
808     a->donotstash = flg;
809     break;
810   case MAT_SYMMETRIC:
811   case MAT_STRUCTURALLY_SYMMETRIC:
812   case MAT_HERMITIAN:
813   case MAT_SYMMETRY_ETERNAL:
814   case MAT_IGNORE_LOWER_TRIANGULAR:
815   case MAT_IGNORE_ZERO_ENTRIES:
816     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
817     break;
818   default:
819     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
825 {
826   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
827   const PetscScalar *l;
828   PetscScalar       x,*v,*vv,*r;
829   PetscErrorCode    ierr;
830   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
831 
832   PetscFunctionBegin;
833   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
834   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
835   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
836   if (ll) {
837     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
838     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
839     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
840     for (i=0; i<m; i++) {
841       x = l[i];
842       v = vv + i;
843       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
844     }
845     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
846     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
847   }
848   if (rr) {
849     const PetscScalar *ar;
850 
851     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
852     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
853     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
854     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
855     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr);
856     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr);
857     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
858     for (i=0; i<n; i++) {
859       x = r[i];
860       v = vv + i*lda;
861       for (j=0; j<m; j++) (*v++) *= x;
862     }
863     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
864     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
865   }
866   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
871 {
872   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
873   PetscErrorCode    ierr;
874   PetscInt          i,j;
875   PetscMPIInt       size;
876   PetscReal         sum = 0.0;
877   const PetscScalar *av,*v;
878 
879   PetscFunctionBegin;
880   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
881   v    = av;
882   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
883   if (size == 1) {
884     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
885   } else {
886     if (type == NORM_FROBENIUS) {
887       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
888         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
889       }
890       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
891       *nrm = PetscSqrtReal(*nrm);
892       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
893     } else if (type == NORM_1) {
894       PetscReal *tmp,*tmp2;
895       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
896       *nrm = 0.0;
897       v    = av;
898       for (j=0; j<mdn->A->cmap->n; j++) {
899         for (i=0; i<mdn->A->rmap->n; i++) {
900           tmp[j] += PetscAbsScalar(*v);  v++;
901         }
902       }
903       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
904       for (j=0; j<A->cmap->N; j++) {
905         if (tmp2[j] > *nrm) *nrm = tmp2[j];
906       }
907       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
908       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
909     } else if (type == NORM_INFINITY) { /* max row norm */
910       PetscReal ntemp;
911       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
912       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
913     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
914   }
915   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
920 {
921   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
922   Mat            B;
923   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
924   PetscErrorCode ierr;
925   PetscInt       j,i,lda;
926   PetscScalar    *v;
927 
928   PetscFunctionBegin;
929   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
930     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
931     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
932     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
933     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
934   } else B = *matout;
935 
936   m    = a->A->rmap->n; n = a->A->cmap->n;
937   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
938   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
939   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
940   for (i=0; i<m; i++) rwork[i] = rstart + i;
941   for (j=0; j<n; j++) {
942     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
943     v   += lda;
944   }
945   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
946   ierr = PetscFree(rwork);CHKERRQ(ierr);
947   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
948   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
949   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
950     *matout = B;
951   } else {
952     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
958 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
959 
960 PetscErrorCode MatSetUp_MPIDense(Mat A)
961 {
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
966   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
967   if (!A->preallocated) {
968     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
969   }
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
974 {
975   PetscErrorCode ierr;
976   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
977 
978   PetscFunctionBegin;
979   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatConjugate_MPIDense(Mat mat)
984 {
985   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = MatConjugate(a->A);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 PetscErrorCode MatRealPart_MPIDense(Mat A)
994 {
995   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
996   PetscErrorCode ierr;
997 
998   PetscFunctionBegin;
999   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1004 {
1005   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1014 {
1015   PetscErrorCode ierr;
1016   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1017 
1018   PetscFunctionBegin;
1019   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1020   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1021   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
1026 
1027 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
1028 {
1029   PetscErrorCode ierr;
1030   PetscInt       i,m,n;
1031   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1032   PetscReal      *work;
1033 
1034   PetscFunctionBegin;
1035   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
1036   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1037   if (type == REDUCTION_MEAN_REALPART) {
1038     ierr = MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work);CHKERRQ(ierr);
1039   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1040     ierr = MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work);CHKERRQ(ierr);
1041   } else {
1042     ierr = MatGetColumnReductions_SeqDense(a->A,type,work);CHKERRQ(ierr);
1043   }
1044   if (type == NORM_2) {
1045     for (i=0; i<n; i++) work[i] *= work[i];
1046   }
1047   if (type == NORM_INFINITY) {
1048     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRMPI(ierr);
1049   } else {
1050     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRMPI(ierr);
1051   }
1052   ierr = PetscFree(work);CHKERRQ(ierr);
1053   if (type == NORM_2) {
1054     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1055   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1056     for (i=0; i<n; i++) reductions[i] /= m;
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #if defined(PETSC_HAVE_CUDA)
1062 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1063 {
1064   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1065   PetscErrorCode ierr;
1066   PetscInt       lda;
1067 
1068   PetscFunctionBegin;
1069   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1070   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1071   if (!a->cvec) {
1072     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1073     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1074   }
1075   a->vecinuse = col + 1;
1076   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1077   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1078   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1079   *v   = a->cvec;
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1084 {
1085   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1090   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1091   a->vecinuse = 0;
1092   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1093   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1094   *v   = NULL;
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1099 {
1100   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1101   PetscInt       lda;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1106   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1107   if (!a->cvec) {
1108     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1109     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1110   }
1111   a->vecinuse = col + 1;
1112   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1113   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1114   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1115   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1116   *v   = a->cvec;
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1121 {
1122   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1127   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1128   a->vecinuse = 0;
1129   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1130   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1131   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1132   *v   = NULL;
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1137 {
1138   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1139   PetscErrorCode ierr;
1140   PetscInt       lda;
1141 
1142   PetscFunctionBegin;
1143   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1144   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1145   if (!a->cvec) {
1146     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1147     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1148   }
1149   a->vecinuse = col + 1;
1150   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1151   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1152   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1153   *v   = a->cvec;
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1158 {
1159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1164   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1165   a->vecinuse = 0;
1166   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1167   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1168   *v   = NULL;
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1173 {
1174   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1179   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1180   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1185 {
1186   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1191   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1192   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1197 {
1198   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1203   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1204   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1209 {
1210   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1219 {
1220   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1229 {
1230   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1239 {
1240   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1249 {
1250   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1251   PetscErrorCode ierr;
1252 
1253   PetscFunctionBegin;
1254   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1259 {
1260   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1269 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1270 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1271 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1272 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1273 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1274 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1275 
1276 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1277 {
1278   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1283   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1284   if (d->A) {
1285     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1286   }
1287   mat->boundtocpu = bind;
1288   if (!bind) {
1289     PetscBool iscuda;
1290 
1291     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1292     if (!iscuda) {
1293       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1294     }
1295     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1296     if (!iscuda) {
1297       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1298     }
1299     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1300     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1301     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1302     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1303     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1304     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1305   } else {
1306     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1307     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1308     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1309     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1310     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1311     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1312   }
1313   if (d->cmat) {
1314     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1315   }
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1320 {
1321   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1322   PetscErrorCode ierr;
1323   PetscBool      iscuda;
1324 
1325   PetscFunctionBegin;
1326   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1327   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1328   if (!iscuda) PetscFunctionReturn(0);
1329   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1330   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1331   if (!d->A) {
1332     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1333     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1334     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1335   }
1336   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1337   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1338   A->preallocated = PETSC_TRUE;
1339   PetscFunctionReturn(0);
1340 }
1341 #endif
1342 
1343 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1344 {
1345   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1354 {
1355   PetscFunctionBegin;
1356   *missing = PETSC_FALSE;
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1361 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1362 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1363 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1364 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1365 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1366 
1367 /* -------------------------------------------------------------------*/
1368 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1369                                         MatGetRow_MPIDense,
1370                                         MatRestoreRow_MPIDense,
1371                                         MatMult_MPIDense,
1372                                 /*  4*/ MatMultAdd_MPIDense,
1373                                         MatMultTranspose_MPIDense,
1374                                         MatMultTransposeAdd_MPIDense,
1375                                         NULL,
1376                                         NULL,
1377                                         NULL,
1378                                 /* 10*/ NULL,
1379                                         NULL,
1380                                         NULL,
1381                                         NULL,
1382                                         MatTranspose_MPIDense,
1383                                 /* 15*/ MatGetInfo_MPIDense,
1384                                         MatEqual_MPIDense,
1385                                         MatGetDiagonal_MPIDense,
1386                                         MatDiagonalScale_MPIDense,
1387                                         MatNorm_MPIDense,
1388                                 /* 20*/ MatAssemblyBegin_MPIDense,
1389                                         MatAssemblyEnd_MPIDense,
1390                                         MatSetOption_MPIDense,
1391                                         MatZeroEntries_MPIDense,
1392                                 /* 24*/ MatZeroRows_MPIDense,
1393                                         NULL,
1394                                         NULL,
1395                                         NULL,
1396                                         NULL,
1397                                 /* 29*/ MatSetUp_MPIDense,
1398                                         NULL,
1399                                         NULL,
1400                                         MatGetDiagonalBlock_MPIDense,
1401                                         NULL,
1402                                 /* 34*/ MatDuplicate_MPIDense,
1403                                         NULL,
1404                                         NULL,
1405                                         NULL,
1406                                         NULL,
1407                                 /* 39*/ MatAXPY_MPIDense,
1408                                         MatCreateSubMatrices_MPIDense,
1409                                         NULL,
1410                                         MatGetValues_MPIDense,
1411                                         MatCopy_MPIDense,
1412                                 /* 44*/ NULL,
1413                                         MatScale_MPIDense,
1414                                         MatShift_Basic,
1415                                         NULL,
1416                                         NULL,
1417                                 /* 49*/ MatSetRandom_MPIDense,
1418                                         NULL,
1419                                         NULL,
1420                                         NULL,
1421                                         NULL,
1422                                 /* 54*/ NULL,
1423                                         NULL,
1424                                         NULL,
1425                                         NULL,
1426                                         NULL,
1427                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1428                                         MatDestroy_MPIDense,
1429                                         MatView_MPIDense,
1430                                         NULL,
1431                                         NULL,
1432                                 /* 64*/ NULL,
1433                                         NULL,
1434                                         NULL,
1435                                         NULL,
1436                                         NULL,
1437                                 /* 69*/ NULL,
1438                                         NULL,
1439                                         NULL,
1440                                         NULL,
1441                                         NULL,
1442                                 /* 74*/ NULL,
1443                                         NULL,
1444                                         NULL,
1445                                         NULL,
1446                                         NULL,
1447                                 /* 79*/ NULL,
1448                                         NULL,
1449                                         NULL,
1450                                         NULL,
1451                                 /* 83*/ MatLoad_MPIDense,
1452                                         NULL,
1453                                         NULL,
1454                                         NULL,
1455                                         NULL,
1456                                         NULL,
1457                                 /* 89*/ NULL,
1458                                         NULL,
1459                                         NULL,
1460                                         NULL,
1461                                         NULL,
1462                                 /* 94*/ NULL,
1463                                         NULL,
1464                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1465                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1466                                         NULL,
1467                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1468                                         NULL,
1469                                         NULL,
1470                                         MatConjugate_MPIDense,
1471                                         NULL,
1472                                 /*104*/ NULL,
1473                                         MatRealPart_MPIDense,
1474                                         MatImaginaryPart_MPIDense,
1475                                         NULL,
1476                                         NULL,
1477                                 /*109*/ NULL,
1478                                         NULL,
1479                                         NULL,
1480                                         MatGetColumnVector_MPIDense,
1481                                         MatMissingDiagonal_MPIDense,
1482                                 /*114*/ NULL,
1483                                         NULL,
1484                                         NULL,
1485                                         NULL,
1486                                         NULL,
1487                                 /*119*/ NULL,
1488                                         NULL,
1489                                         NULL,
1490                                         NULL,
1491                                         NULL,
1492                                 /*124*/ NULL,
1493                                         MatGetColumnReductions_MPIDense,
1494                                         NULL,
1495                                         NULL,
1496                                         NULL,
1497                                 /*129*/ NULL,
1498                                         NULL,
1499                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1500                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1501                                         NULL,
1502                                 /*134*/ NULL,
1503                                         NULL,
1504                                         NULL,
1505                                         NULL,
1506                                         NULL,
1507                                 /*139*/ NULL,
1508                                         NULL,
1509                                         NULL,
1510                                         NULL,
1511                                         NULL,
1512                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1513                                 /*145*/ NULL,
1514                                         NULL,
1515                                         NULL
1516 };
1517 
1518 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1519 {
1520   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1521   PetscBool      iscuda;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1526   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1527   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1528   if (!a->A) {
1529     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1530     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1531     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1532   }
1533   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1534   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1535   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1536   mat->preallocated = PETSC_TRUE;
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1541 {
1542   Mat            B,C;
1543   PetscErrorCode ierr;
1544 
1545   PetscFunctionBegin;
1546   ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr);
1547   ierr = MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1548   ierr = MatDestroy(&C);CHKERRQ(ierr);
1549   if (reuse == MAT_REUSE_MATRIX) {
1550     C = *newmat;
1551   } else C = NULL;
1552   ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1553   ierr = MatDestroy(&B);CHKERRQ(ierr);
1554   if (reuse == MAT_INPLACE_MATRIX) {
1555     ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr);
1556   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1561 {
1562   Mat            B,C;
1563   PetscErrorCode ierr;
1564 
1565   PetscFunctionBegin;
1566   ierr = MatDenseGetLocalMatrix(A,&C);CHKERRQ(ierr);
1567   ierr = MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1568   if (reuse == MAT_REUSE_MATRIX) {
1569     C = *newmat;
1570   } else C = NULL;
1571   ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1572   ierr = MatDestroy(&B);CHKERRQ(ierr);
1573   if (reuse == MAT_INPLACE_MATRIX) {
1574     ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr);
1575   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #if defined(PETSC_HAVE_ELEMENTAL)
1580 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1581 {
1582   Mat            mat_elemental;
1583   PetscErrorCode ierr;
1584   PetscScalar    *v;
1585   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1586 
1587   PetscFunctionBegin;
1588   if (reuse == MAT_REUSE_MATRIX) {
1589     mat_elemental = *newmat;
1590     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1591   } else {
1592     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1593     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1594     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1595     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1596     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1597   }
1598 
1599   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1600   for (i=0; i<N; i++) cols[i] = i;
1601   for (i=0; i<m; i++) rows[i] = rstart + i;
1602 
1603   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1604   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1605   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1606   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1608   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1609   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1610 
1611   if (reuse == MAT_INPLACE_MATRIX) {
1612     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1613   } else {
1614     *newmat = mat_elemental;
1615   }
1616   PetscFunctionReturn(0);
1617 }
1618 #endif
1619 
1620 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1621 {
1622   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1623   PetscErrorCode ierr;
1624 
1625   PetscFunctionBegin;
1626   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1631 {
1632   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1633   PetscErrorCode ierr;
1634 
1635   PetscFunctionBegin;
1636   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1641 {
1642   PetscErrorCode ierr;
1643   Mat_MPIDense   *mat;
1644   PetscInt       m,nloc,N;
1645 
1646   PetscFunctionBegin;
1647   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1648   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1649   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1650     PetscInt sum;
1651 
1652     if (n == PETSC_DECIDE) {
1653       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1654     }
1655     /* Check sum(n) = N */
1656     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
1657     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1658 
1659     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1660     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
1661   }
1662 
1663   /* numeric phase */
1664   mat = (Mat_MPIDense*)(*outmat)->data;
1665   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1666   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 #if defined(PETSC_HAVE_CUDA)
1672 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1673 {
1674   Mat            B;
1675   Mat_MPIDense   *m;
1676   PetscErrorCode ierr;
1677 
1678   PetscFunctionBegin;
1679   if (reuse == MAT_INITIAL_MATRIX) {
1680     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1681   } else if (reuse == MAT_REUSE_MATRIX) {
1682     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1683   }
1684 
1685   B    = *newmat;
1686   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1687   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1688   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1689   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1696   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1697   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1698   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1699   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1700   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1701   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1702   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1703   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1704   m    = (Mat_MPIDense*)(B)->data;
1705   if (m->A) {
1706     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1707     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1708   }
1709   B->ops->bindtocpu = NULL;
1710   B->offloadmask    = PETSC_OFFLOAD_CPU;
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1715 {
1716   Mat            B;
1717   Mat_MPIDense   *m;
1718   PetscErrorCode ierr;
1719 
1720   PetscFunctionBegin;
1721   if (reuse == MAT_INITIAL_MATRIX) {
1722     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1723   } else if (reuse == MAT_REUSE_MATRIX) {
1724     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1725   }
1726 
1727   B    = *newmat;
1728   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1729   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1730   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1731   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1732   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1733   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1734   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1735   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1736   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1737   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1738   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1739   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1740   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1741   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1742   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1743   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1744   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1745   m    = (Mat_MPIDense*)(B->data);
1746   if (m->A) {
1747     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1748     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1749     B->offloadmask = PETSC_OFFLOAD_BOTH;
1750   } else {
1751     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1752   }
1753   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1754 
1755   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1756   PetscFunctionReturn(0);
1757 }
1758 #endif
1759 
1760 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1761 {
1762   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1763   PetscErrorCode ierr;
1764   PetscInt       lda;
1765 
1766   PetscFunctionBegin;
1767   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1768   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1769   if (!a->cvec) {
1770     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1771   }
1772   a->vecinuse = col + 1;
1773   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1774   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1775   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1776   *v   = a->cvec;
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1781 {
1782   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1783   PetscErrorCode ierr;
1784 
1785   PetscFunctionBegin;
1786   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1787   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1788   a->vecinuse = 0;
1789   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1790   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1791   *v   = NULL;
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1796 {
1797   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1798   PetscErrorCode ierr;
1799   PetscInt       lda;
1800 
1801   PetscFunctionBegin;
1802   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1803   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1804   if (!a->cvec) {
1805     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1806   }
1807   a->vecinuse = col + 1;
1808   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1809   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1810   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1811   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1812   *v   = a->cvec;
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1817 {
1818   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1819   PetscErrorCode ierr;
1820 
1821   PetscFunctionBegin;
1822   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1823   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1824   a->vecinuse = 0;
1825   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1826   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1827   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1828   *v   = NULL;
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1833 {
1834   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1835   PetscErrorCode ierr;
1836   PetscInt       lda;
1837 
1838   PetscFunctionBegin;
1839   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1840   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1841   if (!a->cvec) {
1842     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1843   }
1844   a->vecinuse = col + 1;
1845   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1846   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1847   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1848   *v   = a->cvec;
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1853 {
1854   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1855   PetscErrorCode ierr;
1856 
1857   PetscFunctionBegin;
1858   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1859   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1860   a->vecinuse = 0;
1861   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1862   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1863   *v   = NULL;
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1868 {
1869   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1870   Mat_MPIDense   *c;
1871   PetscErrorCode ierr;
1872   MPI_Comm       comm;
1873   PetscBool      setup = PETSC_FALSE;
1874 
1875   PetscFunctionBegin;
1876   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1877   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1878   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1879   if (!a->cmat) {
1880     setup = PETSC_TRUE;
1881     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1882     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1883     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1884     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1885     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1886     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1887   } else if (cend-cbegin != a->cmat->cmap->N) {
1888     setup = PETSC_TRUE;
1889     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1890     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1891     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1892     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1893   }
1894   c = (Mat_MPIDense*)a->cmat->data;
1895   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1896   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1897   if (setup) { /* do we really need this? */
1898     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1899   }
1900   a->cmat->preallocated = PETSC_TRUE;
1901   a->cmat->assembled = PETSC_TRUE;
1902   a->matinuse = cbegin + 1;
1903   *v = a->cmat;
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1908 {
1909   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1910   Mat_MPIDense   *c;
1911   PetscErrorCode ierr;
1912 
1913   PetscFunctionBegin;
1914   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1915   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1916   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1917   a->matinuse = 0;
1918   c    = (Mat_MPIDense*)a->cmat->data;
1919   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1920   *v   = NULL;
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 /*MC
1925    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1926 
1927    Options Database Keys:
1928 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1929 
1930   Level: beginner
1931 
1932 .seealso: MatCreateDense()
1933 
1934 M*/
1935 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1936 {
1937   Mat_MPIDense   *a;
1938   PetscErrorCode ierr;
1939 
1940   PetscFunctionBegin;
1941   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1942   mat->data = (void*)a;
1943   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1944 
1945   mat->insertmode = NOT_SET_VALUES;
1946 
1947   /* build cache for off array entries formed */
1948   a->donotstash = PETSC_FALSE;
1949 
1950   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1951 
1952   /* stuff used for matrix vector multiply */
1953   a->lvec        = NULL;
1954   a->Mvctx       = NULL;
1955   a->roworiented = PETSC_TRUE;
1956 
1957   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1958   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1959   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1960   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1961   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1962   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1963   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1964   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1965   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1966   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1967   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1968   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1969   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1970   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1971   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1972   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1973   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1974   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1975   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1976   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
1977   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ);CHKERRQ(ierr);
1978 #if defined(PETSC_HAVE_ELEMENTAL)
1979   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1980 #endif
1981 #if defined(PETSC_HAVE_SCALAPACK)
1982   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1983 #endif
1984 #if defined(PETSC_HAVE_CUDA)
1985   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1986 #endif
1987   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1988   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1989   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1990 #if defined(PETSC_HAVE_CUDA)
1991   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1992   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1993 #endif
1994 
1995   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1996   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1997   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 /*MC
2002    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
2003 
2004    Options Database Keys:
2005 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
2006 
2007   Level: beginner
2008 
2009 .seealso:
2010 
2011 M*/
2012 #if defined(PETSC_HAVE_CUDA)
2013 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2014 {
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
2019   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
2020   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
2021   PetscFunctionReturn(0);
2022 }
2023 #endif
2024 
2025 /*MC
2026    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2027 
2028    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
2029    and MATMPIDENSE otherwise.
2030 
2031    Options Database Keys:
2032 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
2033 
2034   Level: beginner
2035 
2036 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
2037 M*/
2038 
2039 /*MC
2040    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2041 
2042    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
2043    and MATMPIDENSECUDA otherwise.
2044 
2045    Options Database Keys:
2046 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
2047 
2048   Level: beginner
2049 
2050 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
2051 M*/
2052 
2053 /*@C
2054    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2055 
2056    Collective
2057 
2058    Input Parameters:
2059 .  B - the matrix
2060 -  data - optional location of matrix data.  Set data=NULL for PETSc
2061    to control all matrix memory allocation.
2062 
2063    Notes:
2064    The dense format is fully compatible with standard Fortran 77
2065    storage by columns.
2066 
2067    The data input variable is intended primarily for Fortran programmers
2068    who wish to allocate their own matrix memory space.  Most users should
2069    set data=NULL.
2070 
2071    Level: intermediate
2072 
2073 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2074 @*/
2075 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2076 {
2077   PetscErrorCode ierr;
2078 
2079   PetscFunctionBegin;
2080   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2081   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 /*@
2086    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2087    array provided by the user. This is useful to avoid copying an array
2088    into a matrix
2089 
2090    Not Collective
2091 
2092    Input Parameters:
2093 +  mat - the matrix
2094 -  array - the array in column major order
2095 
2096    Notes:
2097    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2098    freed when the matrix is destroyed.
2099 
2100    Level: developer
2101 
2102 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2103 
2104 @*/
2105 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2106 {
2107   PetscErrorCode ierr;
2108 
2109   PetscFunctionBegin;
2110   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2111   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2112   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2113 #if defined(PETSC_HAVE_CUDA)
2114   mat->offloadmask = PETSC_OFFLOAD_CPU;
2115 #endif
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 /*@
2120    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2121 
2122    Not Collective
2123 
2124    Input Parameters:
2125 .  mat - the matrix
2126 
2127    Notes:
2128    You can only call this after a call to MatDensePlaceArray()
2129 
2130    Level: developer
2131 
2132 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2133 
2134 @*/
2135 PetscErrorCode  MatDenseResetArray(Mat mat)
2136 {
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2141   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2142   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 /*@
2147    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2148    array provided by the user. This is useful to avoid copying an array
2149    into a matrix
2150 
2151    Not Collective
2152 
2153    Input Parameters:
2154 +  mat - the matrix
2155 -  array - the array in column major order
2156 
2157    Notes:
2158    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2159    freed by the user. It will be freed when the matrix is destroyed.
2160 
2161    Level: developer
2162 
2163 .seealso: MatDenseGetArray(), VecReplaceArray()
2164 @*/
2165 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2166 {
2167   PetscErrorCode ierr;
2168 
2169   PetscFunctionBegin;
2170   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2171   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2172   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2173 #if defined(PETSC_HAVE_CUDA)
2174   mat->offloadmask = PETSC_OFFLOAD_CPU;
2175 #endif
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 #if defined(PETSC_HAVE_CUDA)
2180 /*@C
2181    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2182    array provided by the user. This is useful to avoid copying an array
2183    into a matrix
2184 
2185    Not Collective
2186 
2187    Input Parameters:
2188 +  mat - the matrix
2189 -  array - the array in column major order
2190 
2191    Notes:
2192    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2193    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2194 
2195    Level: developer
2196 
2197 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2198 @*/
2199 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2200 {
2201   PetscErrorCode ierr;
2202 
2203   PetscFunctionBegin;
2204   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2205   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2206   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2207   mat->offloadmask = PETSC_OFFLOAD_GPU;
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 /*@C
2212    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2213 
2214    Not Collective
2215 
2216    Input Parameters:
2217 .  mat - the matrix
2218 
2219    Notes:
2220    You can only call this after a call to MatDenseCUDAPlaceArray()
2221 
2222    Level: developer
2223 
2224 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2225 
2226 @*/
2227 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2228 {
2229   PetscErrorCode ierr;
2230 
2231   PetscFunctionBegin;
2232   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2233   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2234   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 /*@C
2239    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2240    array provided by the user. This is useful to avoid copying an array
2241    into a matrix
2242 
2243    Not Collective
2244 
2245    Input Parameters:
2246 +  mat - the matrix
2247 -  array - the array in column major order
2248 
2249    Notes:
2250    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2251    The memory passed in CANNOT be freed by the user. It will be freed
2252    when the matrix is destroyed. The array should respect the matrix leading dimension.
2253 
2254    Level: developer
2255 
2256 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2257 @*/
2258 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2259 {
2260   PetscErrorCode ierr;
2261 
2262   PetscFunctionBegin;
2263   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2264   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2265   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2266   mat->offloadmask = PETSC_OFFLOAD_GPU;
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 /*@C
2271    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2272 
2273    Not Collective
2274 
2275    Input Parameters:
2276 .  A - the matrix
2277 
2278    Output Parameters
2279 .  array - the GPU array in column major order
2280 
2281    Notes:
2282    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2283 
2284    Level: developer
2285 
2286 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2287 @*/
2288 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2289 {
2290   PetscErrorCode ierr;
2291 
2292   PetscFunctionBegin;
2293   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2294   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2295   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 /*@C
2300    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2301 
2302    Not Collective
2303 
2304    Input Parameters:
2305 +  A - the matrix
2306 -  array - the GPU array in column major order
2307 
2308    Notes:
2309 
2310    Level: developer
2311 
2312 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2313 @*/
2314 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2315 {
2316   PetscErrorCode ierr;
2317 
2318   PetscFunctionBegin;
2319   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2320   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2321   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2322   A->offloadmask = PETSC_OFFLOAD_GPU;
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 /*@C
2327    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2328 
2329    Not Collective
2330 
2331    Input Parameters:
2332 .  A - the matrix
2333 
2334    Output Parameters
2335 .  array - the GPU array in column major order
2336 
2337    Notes:
2338    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2339 
2340    Level: developer
2341 
2342 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2343 @*/
2344 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2345 {
2346   PetscErrorCode ierr;
2347 
2348   PetscFunctionBegin;
2349   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2350   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 /*@C
2355    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2356 
2357    Not Collective
2358 
2359    Input Parameters:
2360 +  A - the matrix
2361 -  array - the GPU array in column major order
2362 
2363    Notes:
2364    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2365 
2366    Level: developer
2367 
2368 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2369 @*/
2370 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2371 {
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 /*@C
2380    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2381 
2382    Not Collective
2383 
2384    Input Parameters:
2385 .  A - the matrix
2386 
2387    Output Parameters
2388 .  array - the GPU array in column major order
2389 
2390    Notes:
2391    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2392 
2393    Level: developer
2394 
2395 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2396 @*/
2397 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2398 {
2399   PetscErrorCode ierr;
2400 
2401   PetscFunctionBegin;
2402   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2403   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2404   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 /*@C
2409    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2410 
2411    Not Collective
2412 
2413    Input Parameters:
2414 +  A - the matrix
2415 -  array - the GPU array in column major order
2416 
2417    Notes:
2418 
2419    Level: developer
2420 
2421 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2422 @*/
2423 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2424 {
2425   PetscErrorCode ierr;
2426 
2427   PetscFunctionBegin;
2428   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2429   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2430   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2431   A->offloadmask = PETSC_OFFLOAD_GPU;
2432   PetscFunctionReturn(0);
2433 }
2434 #endif
2435 
2436 /*@C
2437    MatCreateDense - Creates a matrix in dense format.
2438 
2439    Collective
2440 
2441    Input Parameters:
2442 +  comm - MPI communicator
2443 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2444 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2445 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2446 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2447 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2448    to control all matrix memory allocation.
2449 
2450    Output Parameter:
2451 .  A - the matrix
2452 
2453    Notes:
2454    The dense format is fully compatible with standard Fortran 77
2455    storage by columns.
2456    Note that, although local portions of the matrix are stored in column-major
2457    order, the matrix is partitioned across MPI ranks by row.
2458 
2459    The data input variable is intended primarily for Fortran programmers
2460    who wish to allocate their own matrix memory space.  Most users should
2461    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2462 
2463    The user MUST specify either the local or global matrix dimensions
2464    (possibly both).
2465 
2466    Level: intermediate
2467 
2468 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2469 @*/
2470 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2471 {
2472   PetscErrorCode ierr;
2473   PetscMPIInt    size;
2474 
2475   PetscFunctionBegin;
2476   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2477   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2478   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2479   if (size > 1) {
2480     PetscBool havedata = (PetscBool)!!data;
2481 
2482     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2483     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2484     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr);
2485     if (havedata) {  /* user provided data array, so no need to assemble */
2486       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2487       (*A)->assembled = PETSC_TRUE;
2488     }
2489   } else {
2490     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2491     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2492   }
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 #if defined(PETSC_HAVE_CUDA)
2497 /*@C
2498    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2499 
2500    Collective
2501 
2502    Input Parameters:
2503 +  comm - MPI communicator
2504 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2505 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2506 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2507 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2508 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2509    to control matrix memory allocation.
2510 
2511    Output Parameter:
2512 .  A - the matrix
2513 
2514    Notes:
2515 
2516    Level: intermediate
2517 
2518 .seealso: MatCreate(), MatCreateDense()
2519 @*/
2520 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2521 {
2522   PetscErrorCode ierr;
2523   PetscMPIInt    size;
2524 
2525   PetscFunctionBegin;
2526   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2527   PetscValidLogicalCollectiveBool(*A,!!data,6);
2528   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2529   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2530   if (size > 1) {
2531     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2532     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2533     if (data) {  /* user provided data array, so no need to assemble */
2534       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2535       (*A)->assembled = PETSC_TRUE;
2536     }
2537   } else {
2538     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2539     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2540   }
2541   PetscFunctionReturn(0);
2542 }
2543 #endif
2544 
2545 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2546 {
2547   Mat            mat;
2548   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2549   PetscErrorCode ierr;
2550 
2551   PetscFunctionBegin;
2552   *newmat = NULL;
2553   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2554   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2555   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2556   a       = (Mat_MPIDense*)mat->data;
2557 
2558   mat->factortype   = A->factortype;
2559   mat->assembled    = PETSC_TRUE;
2560   mat->preallocated = PETSC_TRUE;
2561 
2562   mat->insertmode = NOT_SET_VALUES;
2563   a->donotstash   = oldmat->donotstash;
2564 
2565   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2566   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2567 
2568   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2569   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2570   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2571 
2572   *newmat = mat;
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2577 {
2578   PetscErrorCode ierr;
2579   PetscBool      isbinary;
2580 #if defined(PETSC_HAVE_HDF5)
2581   PetscBool      ishdf5;
2582 #endif
2583 
2584   PetscFunctionBegin;
2585   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2586   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2587   /* force binary viewer to load .info file if it has not yet done so */
2588   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2589   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2590 #if defined(PETSC_HAVE_HDF5)
2591   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2592 #endif
2593   if (isbinary) {
2594     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2595 #if defined(PETSC_HAVE_HDF5)
2596   } else if (ishdf5) {
2597     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2598 #endif
2599   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2604 {
2605   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2606   Mat            a,b;
2607   PetscBool      flg;
2608   PetscErrorCode ierr;
2609 
2610   PetscFunctionBegin;
2611   a    = matA->A;
2612   b    = matB->A;
2613   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2614   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2615   PetscFunctionReturn(0);
2616 }
2617 
2618 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2619 {
2620   PetscErrorCode        ierr;
2621   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2622 
2623   PetscFunctionBegin;
2624   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2625   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2626   ierr = PetscFree(atb);CHKERRQ(ierr);
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2631 {
2632   PetscErrorCode        ierr;
2633   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2634 
2635   PetscFunctionBegin;
2636   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2637   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2638   ierr = PetscFree(abt);CHKERRQ(ierr);
2639   PetscFunctionReturn(0);
2640 }
2641 
2642 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2643 {
2644   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2645   Mat_TransMatMultDense *atb;
2646   PetscErrorCode        ierr;
2647   MPI_Comm              comm;
2648   PetscMPIInt           size,*recvcounts;
2649   PetscScalar           *carray,*sendbuf;
2650   const PetscScalar     *atbarray;
2651   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2652   const PetscInt        *ranges;
2653 
2654   PetscFunctionBegin;
2655   MatCheckProduct(C,3);
2656   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2657   atb = (Mat_TransMatMultDense *)C->product->data;
2658   recvcounts = atb->recvcounts;
2659   sendbuf = atb->sendbuf;
2660 
2661   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2662   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2663 
2664   /* compute atbarray = aseq^T * bseq */
2665   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2666 
2667   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2668 
2669   /* arrange atbarray into sendbuf */
2670   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2671   for (proc=0, k=0; proc<size; proc++) {
2672     for (j=0; j<cN; j++) {
2673       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2674     }
2675   }
2676   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2677 
2678   /* sum all atbarray to local values of C */
2679   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2680   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr);
2681   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2682   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2683   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2684   PetscFunctionReturn(0);
2685 }
2686 
2687 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2688 {
2689   PetscErrorCode        ierr;
2690   MPI_Comm              comm;
2691   PetscMPIInt           size;
2692   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2693   Mat_TransMatMultDense *atb;
2694   PetscBool             cisdense;
2695   PetscInt              i;
2696   const PetscInt        *ranges;
2697 
2698   PetscFunctionBegin;
2699   MatCheckProduct(C,3);
2700   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2701   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2702   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2703     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2704   }
2705 
2706   /* create matrix product C */
2707   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2708   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2709   if (!cisdense) {
2710     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2711   }
2712   ierr = MatSetUp(C);CHKERRQ(ierr);
2713 
2714   /* create data structure for reuse C */
2715   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2716   ierr = PetscNew(&atb);CHKERRQ(ierr);
2717   cM   = C->rmap->N;
2718   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2719   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2720   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2721 
2722   C->product->data    = atb;
2723   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2728 {
2729   PetscErrorCode        ierr;
2730   MPI_Comm              comm;
2731   PetscMPIInt           i, size;
2732   PetscInt              maxRows, bufsiz;
2733   PetscMPIInt           tag;
2734   PetscInt              alg;
2735   Mat_MatTransMultDense *abt;
2736   Mat_Product           *product = C->product;
2737   PetscBool             flg;
2738 
2739   PetscFunctionBegin;
2740   MatCheckProduct(C,4);
2741   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2742   /* check local size of A and B */
2743   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2744 
2745   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2746   alg  = flg ? 0 : 1;
2747 
2748   /* setup matrix product C */
2749   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2750   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2751   ierr = MatSetUp(C);CHKERRQ(ierr);
2752   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2753 
2754   /* create data structure for reuse C */
2755   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2756   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2757   ierr = PetscNew(&abt);CHKERRQ(ierr);
2758   abt->tag = tag;
2759   abt->alg = alg;
2760   switch (alg) {
2761   case 1: /* alg: "cyclic" */
2762     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2763     bufsiz = A->cmap->N * maxRows;
2764     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2765     break;
2766   default: /* alg: "allgatherv" */
2767     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2768     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2769     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2770     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2771     break;
2772   }
2773 
2774   C->product->data    = abt;
2775   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2776   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2781 {
2782   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2783   Mat_MatTransMultDense *abt;
2784   PetscErrorCode        ierr;
2785   MPI_Comm              comm;
2786   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2787   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2788   PetscInt              i,cK=A->cmap->N,k,j,bn;
2789   PetscScalar           _DOne=1.0,_DZero=0.0;
2790   const PetscScalar     *av,*bv;
2791   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2792   MPI_Request           reqs[2];
2793   const PetscInt        *ranges;
2794 
2795   PetscFunctionBegin;
2796   MatCheckProduct(C,3);
2797   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2798   abt  = (Mat_MatTransMultDense*)C->product->data;
2799   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2800   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2801   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2802   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2803   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2804   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2805   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2806   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2807   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2808   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2809   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2810   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2811   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2812   bn   = B->rmap->n;
2813   if (blda == bn) {
2814     sendbuf = (PetscScalar*)bv;
2815   } else {
2816     sendbuf = abt->buf[0];
2817     for (k = 0, i = 0; i < cK; i++) {
2818       for (j = 0; j < bn; j++, k++) {
2819         sendbuf[k] = bv[i * blda + j];
2820       }
2821     }
2822   }
2823   if (size > 1) {
2824     sendto = (rank + size - 1) % size;
2825     recvfrom = (rank + size + 1) % size;
2826   } else {
2827     sendto = recvfrom = 0;
2828   }
2829   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2830   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2831   recvisfrom = rank;
2832   for (i = 0; i < size; i++) {
2833     /* we have finished receiving in sending, bufs can be read/modified */
2834     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2835     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2836 
2837     if (nextrecvisfrom != rank) {
2838       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2839       sendsiz = cK * bn;
2840       recvsiz = cK * nextbn;
2841       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2842       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr);
2843       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr);
2844     }
2845 
2846     /* local aseq * sendbuf^T */
2847     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2848     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2849 
2850     if (nextrecvisfrom != rank) {
2851       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2852       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
2853     }
2854     bn = nextbn;
2855     recvisfrom = nextrecvisfrom;
2856     sendbuf = recvbuf;
2857   }
2858   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2859   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2860   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2861   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2867 {
2868   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2869   Mat_MatTransMultDense *abt;
2870   PetscErrorCode        ierr;
2871   MPI_Comm              comm;
2872   PetscMPIInt           size;
2873   PetscScalar           *cv, *sendbuf, *recvbuf;
2874   const PetscScalar     *av,*bv;
2875   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2876   PetscScalar           _DOne=1.0,_DZero=0.0;
2877   PetscBLASInt          cm, cn, ck, alda, clda;
2878 
2879   PetscFunctionBegin;
2880   MatCheckProduct(C,3);
2881   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2882   abt  = (Mat_MatTransMultDense*)C->product->data;
2883   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2884   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2885   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2886   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2887   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2888   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2889   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2890   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2891   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2892   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2893   /* copy transpose of B into buf[0] */
2894   bn      = B->rmap->n;
2895   sendbuf = abt->buf[0];
2896   recvbuf = abt->buf[1];
2897   for (k = 0, j = 0; j < bn; j++) {
2898     for (i = 0; i < cK; i++, k++) {
2899       sendbuf[k] = bv[i * blda + j];
2900     }
2901   }
2902   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2903   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr);
2904   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2905   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2906   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2907   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2908   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2909   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2910   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2911   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2912   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2913   PetscFunctionReturn(0);
2914 }
2915 
2916 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2917 {
2918   Mat_MatTransMultDense *abt;
2919   PetscErrorCode        ierr;
2920 
2921   PetscFunctionBegin;
2922   MatCheckProduct(C,3);
2923   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2924   abt = (Mat_MatTransMultDense*)C->product->data;
2925   switch (abt->alg) {
2926   case 1:
2927     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2928     break;
2929   default:
2930     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2931     break;
2932   }
2933   PetscFunctionReturn(0);
2934 }
2935 
2936 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2937 {
2938   PetscErrorCode   ierr;
2939   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2940 
2941   PetscFunctionBegin;
2942   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2943   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2944   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2945   ierr = PetscFree(ab);CHKERRQ(ierr);
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 #if defined(PETSC_HAVE_ELEMENTAL)
2950 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2951 {
2952   PetscErrorCode   ierr;
2953   Mat_MatMultDense *ab;
2954 
2955   PetscFunctionBegin;
2956   MatCheckProduct(C,3);
2957   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2958   ab   = (Mat_MatMultDense*)C->product->data;
2959   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2960   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2961   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2962   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2967 {
2968   PetscErrorCode   ierr;
2969   Mat              Ae,Be,Ce;
2970   Mat_MatMultDense *ab;
2971 
2972   PetscFunctionBegin;
2973   MatCheckProduct(C,4);
2974   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2975   /* check local size of A and B */
2976   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2977     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2978   }
2979 
2980   /* create elemental matrices Ae and Be */
2981   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2982   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2983   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2984   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2985   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2986 
2987   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2988   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2989   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2990   ierr = MatSetUp(Be);CHKERRQ(ierr);
2991   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2992 
2993   /* compute symbolic Ce = Ae*Be */
2994   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2995   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2996 
2997   /* setup C */
2998   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2999   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
3000   ierr = MatSetUp(C);CHKERRQ(ierr);
3001 
3002   /* create data structure for reuse Cdense */
3003   ierr = PetscNew(&ab);CHKERRQ(ierr);
3004   ab->Ae = Ae;
3005   ab->Be = Be;
3006   ab->Ce = Ce;
3007 
3008   C->product->data    = ab;
3009   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
3010   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3011   PetscFunctionReturn(0);
3012 }
3013 #endif
3014 /* ----------------------------------------------- */
3015 #if defined(PETSC_HAVE_ELEMENTAL)
3016 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3017 {
3018   PetscFunctionBegin;
3019   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3020   C->ops->productsymbolic = MatProductSymbolic_AB;
3021   PetscFunctionReturn(0);
3022 }
3023 #endif
3024 
3025 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3026 {
3027   Mat_Product *product = C->product;
3028   Mat         A = product->A,B=product->B;
3029 
3030   PetscFunctionBegin;
3031   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3032     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
3033   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3034   C->ops->productsymbolic = MatProductSymbolic_AtB;
3035   PetscFunctionReturn(0);
3036 }
3037 
3038 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3039 {
3040   PetscErrorCode ierr;
3041   Mat_Product    *product = C->product;
3042   const char     *algTypes[2] = {"allgatherv","cyclic"};
3043   PetscInt       alg,nalg = 2;
3044   PetscBool      flg = PETSC_FALSE;
3045 
3046   PetscFunctionBegin;
3047   /* Set default algorithm */
3048   alg = 0; /* default is allgatherv */
3049   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
3050   if (flg) {
3051     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3052   }
3053 
3054   /* Get runtime option */
3055   if (product->api_user) {
3056     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
3057     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3058     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3059   } else {
3060     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
3061     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3062     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3063   }
3064   if (flg) {
3065     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3066   }
3067 
3068   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3069   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3074 {
3075   PetscErrorCode ierr;
3076   Mat_Product    *product = C->product;
3077 
3078   PetscFunctionBegin;
3079   switch (product->type) {
3080 #if defined(PETSC_HAVE_ELEMENTAL)
3081   case MATPRODUCT_AB:
3082     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3083     break;
3084 #endif
3085   case MATPRODUCT_AtB:
3086     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3087     break;
3088   case MATPRODUCT_ABt:
3089     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3090     break;
3091   default:
3092     break;
3093   }
3094   PetscFunctionReturn(0);
3095 }
3096