xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision a2aba86c77ac869ca1007cc1e6f5ae9e8649f479)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
46   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
47   PetscErrorCode ierr;
48 
49   PetscFunctionBegin;
50   ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
55 {
56   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
57   PetscErrorCode ierr;
58   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
59 
60   PetscFunctionBegin;
61   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
62   lrow = row - rstart;
63   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
68 {
69   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
70   PetscErrorCode ierr;
71   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
72 
73   PetscFunctionBegin;
74   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
75   lrow = row - rstart;
76   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
81 {
82   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
83   PetscErrorCode ierr;
84   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
85   PetscScalar    *array;
86   MPI_Comm       comm;
87   PetscBool      flg;
88   Mat            B;
89 
90   PetscFunctionBegin;
91   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
92   PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
93   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
94   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
95 
96     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
97     PetscCheckFalse(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
98     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
99     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
100     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
101     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
102     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
103     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
104     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
105     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
106     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
107     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
108     *a   = B;
109     ierr = MatDestroy(&B);CHKERRQ(ierr);
110   } else *a = B;
111   PetscFunctionReturn(0);
112 }
113 
114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
117   PetscErrorCode ierr;
118   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
119   PetscBool      roworiented = A->roworiented;
120 
121   PetscFunctionBegin;
122   for (i=0; i<m; i++) {
123     if (idxm[i] < 0) continue;
124     PetscCheckFalse(idxm[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
125     if (idxm[i] >= rstart && idxm[i] < rend) {
126       row = idxm[i] - rstart;
127       if (roworiented) {
128         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
129       } else {
130         for (j=0; j<n; j++) {
131           if (idxn[j] < 0) continue;
132           PetscCheckFalse(idxn[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
133           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
134         }
135       }
136     } else if (!A->donotstash) {
137       mat->assembled = PETSC_FALSE;
138       if (roworiented) {
139         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
140       } else {
141         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
142       }
143     }
144   }
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
149 {
150   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
151   PetscErrorCode ierr;
152   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
153 
154   PetscFunctionBegin;
155   for (i=0; i<m; i++) {
156     if (idxm[i] < 0) continue; /* negative row */
157     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
158     if (idxm[i] >= rstart && idxm[i] < rend) {
159       row = idxm[i] - rstart;
160       for (j=0; j<n; j++) {
161         if (idxn[j] < 0) continue; /* negative column */
162         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
163         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
164       }
165     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
181 {
182   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
183   PetscBool      iscuda;
184   PetscErrorCode ierr;
185 
186   PetscFunctionBegin;
187   if (!a->A) {
188     PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
189     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
190     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
191     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
192     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
193     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
194     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
195     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
196   }
197   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
230   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
235 {
236   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
237   PetscErrorCode ierr;
238 
239   PetscFunctionBegin;
240   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
241   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
242   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
247 {
248   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
253   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
254   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
259 {
260   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
261   PetscErrorCode ierr;
262 
263   PetscFunctionBegin;
264   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
265   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
266   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
271 {
272   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
273   PetscErrorCode    ierr;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
284   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
285   PetscCheckFalse(comm_mat != comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
288   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
289   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
290   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
291   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
292   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
299   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
309     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
310     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
311     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
317   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
318   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
326   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
327 
328   /* Assemble the matrices so that the correct flags are set */
329   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331 
332   /* Free work space */
333   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
334   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
335   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343   PetscErrorCode ierr;
344 
345   PetscFunctionBegin;
346   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
351 {
352   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
361 {
362   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
363   PetscErrorCode ierr;
364 
365   PetscFunctionBegin;
366   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
371 {
372   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
373   PetscErrorCode ierr;
374   PetscInt       nstash,reallocs;
375 
376   PetscFunctionBegin;
377   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
378 
379   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
380   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
381   ierr = PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
386 {
387   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
388   PetscErrorCode ierr;
389   PetscInt       i,*row,*col,flg,j,rstart,ncols;
390   PetscMPIInt    n;
391   PetscScalar    *val;
392 
393   PetscFunctionBegin;
394   if (!mdn->donotstash && !mat->nooffprocentries) {
395     /*  wait on receives */
396     while (1) {
397       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
398       if (!flg) break;
399 
400       for (i=0; i<n;) {
401         /* Now identify the consecutive vals belonging to the same row */
402         for (j=i,rstart=row[j]; j<n; j++) {
403           if (row[j] != rstart) break;
404         }
405         if (j < n) ncols = j-i;
406         else       ncols = n-i;
407         /* Now assemble all these values with a single function call */
408         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
409         i    = j;
410       }
411     }
412     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
413   }
414 
415   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
417 
418   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
419     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
425 {
426   PetscErrorCode ierr;
427   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
428 
429   PetscFunctionBegin;
430   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
435 {
436   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
437   PetscErrorCode    ierr;
438   PetscInt          i,len,*lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
449     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
450     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
451     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
452     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
453   }
454   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
455   if (diag != 0.0) {
456     Vec d;
457 
458     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
459     ierr = VecSet(d,diag);CHKERRQ(ierr);
460     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
461     ierr = VecDestroy(&d);CHKERRQ(ierr);
462   }
463   ierr = PetscFree(lrows);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode    ierr;
476   const PetscScalar *ax;
477   PetscScalar       *ay;
478   PetscMemType      axmtype,aymtype;
479 
480   PetscFunctionBegin;
481   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
482   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
483   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr);
484   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr);
485   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
486   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
487   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
494   PetscErrorCode    ierr;
495   const PetscScalar *ax;
496   PetscScalar       *ay;
497   PetscMemType      axmtype,aymtype;
498 
499   PetscFunctionBegin;
500   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
501   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
502   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr);
503   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr);
504   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
505   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
506   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode    ierr;
514   const PetscScalar *ax;
515   PetscScalar       *ay;
516   PetscMemType      axmtype,aymtype;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
520   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
522   ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr);
523   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
524   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
525   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
526   ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
533   PetscErrorCode    ierr;
534   const PetscScalar *ax;
535   PetscScalar       *ay;
536   PetscMemType      axmtype,aymtype;
537 
538   PetscFunctionBegin;
539   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
540   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
541   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
542   ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr);
543   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
544   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
545   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
546   ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
551 {
552   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
553   PetscErrorCode    ierr;
554   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
555   PetscScalar       *x,zero = 0.0;
556   const PetscScalar *av;
557 
558   PetscFunctionBegin;
559   ierr = VecSet(v,zero);CHKERRQ(ierr);
560   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
561   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
562   PetscCheckFalse(n != A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
563   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
564   radd = A->rmap->rstart*m;
565   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
566   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
567   for (i=0; i<len; i++) {
568     x[i] = av[radd + i*lda + i];
569   }
570   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
571   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatDestroy_MPIDense(Mat mat)
576 {
577   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
578   PetscErrorCode ierr;
579 
580   PetscFunctionBegin;
581 #if defined(PETSC_USE_LOG)
582   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
583 #endif
584   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
585   PetscCheckFalse(mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
586   PetscCheckFalse(mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
587   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
588   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
589   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
590   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
591   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
592 
593   ierr = PetscFree(mat->data);CHKERRQ(ierr);
594   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
595 
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
609 #if defined(PETSC_HAVE_ELEMENTAL)
610   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
611 #endif
612 #if defined(PETSC_HAVE_SCALAPACK)
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
614 #endif
615   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
616   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
617   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
618 #if defined (PETSC_HAVE_CUDA)
619   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
620   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
621 #endif
622   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
623   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
624   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
625   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
626   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
627   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
628   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
630   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
631   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
636 
637 #include <petscdraw.h>
638 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
639 {
640   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
641   PetscErrorCode    ierr;
642   PetscMPIInt       rank;
643   PetscViewerType   vtype;
644   PetscBool         iascii,isdraw;
645   PetscViewer       sviewer;
646   PetscViewerFormat format;
647 
648   PetscFunctionBegin;
649   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
650   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
651   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
652   if (iascii) {
653     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
654     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
655     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
656       MatInfo info;
657       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
658       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
659       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
660       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
661       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
662       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
663       PetscFunctionReturn(0);
664     } else if (format == PETSC_VIEWER_ASCII_INFO) {
665       PetscFunctionReturn(0);
666     }
667   } else if (isdraw) {
668     PetscDraw draw;
669     PetscBool isnull;
670 
671     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
672     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
673     if (isnull) PetscFunctionReturn(0);
674   }
675 
676   {
677     /* assemble the entire matrix onto first processor. */
678     Mat         A;
679     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
680     PetscInt    *cols;
681     PetscScalar *vals;
682 
683     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
684     if (rank == 0) {
685       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
686     } else {
687       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
688     }
689     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
690     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
691     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
692     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
693 
694     /* Copy the matrix ... This isn't the most efficient means,
695        but it's quick for now */
696     A->insertmode = INSERT_VALUES;
697 
698     row = mat->rmap->rstart;
699     m   = mdn->A->rmap->n;
700     for (i=0; i<m; i++) {
701       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
702       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
703       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
704       row++;
705     }
706 
707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
710     if (rank == 0) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
715     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
716     ierr = MatDestroy(&A);CHKERRQ(ierr);
717   }
718   PetscFunctionReturn(0);
719 }
720 
721 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
722 {
723   PetscErrorCode ierr;
724   PetscBool      iascii,isbinary,isdraw,issocket;
725 
726   PetscFunctionBegin;
727   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
728   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
729   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
730   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
731 
732   if (iascii || issocket || isdraw) {
733     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else if (isbinary) {
735     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
741 {
742   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
743   Mat            mdn  = mat->A;
744   PetscErrorCode ierr;
745   PetscLogDouble isend[5],irecv[5];
746 
747   PetscFunctionBegin;
748   info->block_size = 1.0;
749 
750   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
751 
752   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
753   isend[3] = info->memory;  isend[4] = info->mallocs;
754   if (flag == MAT_LOCAL) {
755     info->nz_used      = isend[0];
756     info->nz_allocated = isend[1];
757     info->nz_unneeded  = isend[2];
758     info->memory       = isend[3];
759     info->mallocs      = isend[4];
760   } else if (flag == MAT_GLOBAL_MAX) {
761     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
762 
763     info->nz_used      = irecv[0];
764     info->nz_allocated = irecv[1];
765     info->nz_unneeded  = irecv[2];
766     info->memory       = irecv[3];
767     info->mallocs      = irecv[4];
768   } else if (flag == MAT_GLOBAL_SUM) {
769     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
770 
771     info->nz_used      = irecv[0];
772     info->nz_allocated = irecv[1];
773     info->nz_unneeded  = irecv[2];
774     info->memory       = irecv[3];
775     info->mallocs      = irecv[4];
776   }
777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
778   info->fill_ratio_needed = 0;
779   info->factor_mallocs    = 0;
780   PetscFunctionReturn(0);
781 }
782 
783 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
784 {
785   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
786   PetscErrorCode ierr;
787 
788   PetscFunctionBegin;
789   switch (op) {
790   case MAT_NEW_NONZERO_LOCATIONS:
791   case MAT_NEW_NONZERO_LOCATION_ERR:
792   case MAT_NEW_NONZERO_ALLOCATION_ERR:
793     MatCheckPreallocated(A,1);
794     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
795     break;
796   case MAT_ROW_ORIENTED:
797     MatCheckPreallocated(A,1);
798     a->roworiented = flg;
799     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
800     break;
801   case MAT_FORCE_DIAGONAL_ENTRIES:
802   case MAT_KEEP_NONZERO_PATTERN:
803   case MAT_USE_HASH_TABLE:
804   case MAT_SORTED_FULL:
805     ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
806     break;
807   case MAT_IGNORE_OFF_PROC_ENTRIES:
808     a->donotstash = flg;
809     break;
810   case MAT_SYMMETRIC:
811   case MAT_STRUCTURALLY_SYMMETRIC:
812   case MAT_HERMITIAN:
813   case MAT_SYMMETRY_ETERNAL:
814   case MAT_IGNORE_LOWER_TRIANGULAR:
815   case MAT_IGNORE_ZERO_ENTRIES:
816     ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
817     break;
818   default:
819     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
825 {
826   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
827   const PetscScalar *l;
828   PetscScalar       x,*v,*vv,*r;
829   PetscErrorCode    ierr;
830   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
831 
832   PetscFunctionBegin;
833   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
834   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
835   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
836   if (ll) {
837     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
838     PetscCheckFalse(s2a != s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
839     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
840     for (i=0; i<m; i++) {
841       x = l[i];
842       v = vv + i;
843       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
844     }
845     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
846     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
847   }
848   if (rr) {
849     const PetscScalar *ar;
850 
851     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
852     PetscCheckFalse(s3a != s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
853     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
854     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
855     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr);
856     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr);
857     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
858     for (i=0; i<n; i++) {
859       x = r[i];
860       v = vv + i*lda;
861       for (j=0; j<m; j++) (*v++) *= x;
862     }
863     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
864     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
865   }
866   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
871 {
872   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
873   PetscErrorCode    ierr;
874   PetscInt          i,j;
875   PetscMPIInt       size;
876   PetscReal         sum = 0.0;
877   const PetscScalar *av,*v;
878 
879   PetscFunctionBegin;
880   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
881   v    = av;
882   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
883   if (size == 1) {
884     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
885   } else {
886     if (type == NORM_FROBENIUS) {
887       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
888         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
889       }
890       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
891       *nrm = PetscSqrtReal(*nrm);
892       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
893     } else if (type == NORM_1) {
894       PetscReal *tmp,*tmp2;
895       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
896       *nrm = 0.0;
897       v    = av;
898       for (j=0; j<mdn->A->cmap->n; j++) {
899         for (i=0; i<mdn->A->rmap->n; i++) {
900           tmp[j] += PetscAbsScalar(*v);  v++;
901         }
902       }
903       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
904       for (j=0; j<A->cmap->N; j++) {
905         if (tmp2[j] > *nrm) *nrm = tmp2[j];
906       }
907       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
908       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
909     } else if (type == NORM_INFINITY) { /* max row norm */
910       PetscReal ntemp;
911       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
912       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
913     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
914   }
915   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
920 {
921   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
922   Mat            B;
923   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
924   PetscErrorCode ierr;
925   PetscInt       j,i,lda;
926   PetscScalar    *v;
927 
928   PetscFunctionBegin;
929   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
930     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
931     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
932     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
933     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
934   } else B = *matout;
935 
936   m    = a->A->rmap->n; n = a->A->cmap->n;
937   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
938   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
939   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
940   for (i=0; i<m; i++) rwork[i] = rstart + i;
941   for (j=0; j<n; j++) {
942     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
943     v   += lda;
944   }
945   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
946   ierr = PetscFree(rwork);CHKERRQ(ierr);
947   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
948   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
949   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
950     *matout = B;
951   } else {
952     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
958 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
959 
960 PetscErrorCode MatSetUp_MPIDense(Mat A)
961 {
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
966   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
967   if (!A->preallocated) {
968     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
969   }
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
974 {
975   PetscErrorCode ierr;
976   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
977 
978   PetscFunctionBegin;
979   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatConjugate_MPIDense(Mat mat)
984 {
985   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = MatConjugate(a->A);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 PetscErrorCode MatRealPart_MPIDense(Mat A)
994 {
995   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
996   PetscErrorCode ierr;
997 
998   PetscFunctionBegin;
999   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1004 {
1005   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1014 {
1015   PetscErrorCode ierr;
1016   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1017 
1018   PetscFunctionBegin;
1019   PetscCheckFalse(!a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1020   PetscCheckFalse(!a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1021   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
1026 
1027 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
1028 {
1029   PetscErrorCode ierr;
1030   PetscInt       i,m,n;
1031   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1032   PetscReal      *work;
1033 
1034   PetscFunctionBegin;
1035   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
1036   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1037   if (type == REDUCTION_MEAN_REALPART) {
1038     ierr = MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work);CHKERRQ(ierr);
1039   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1040     ierr = MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work);CHKERRQ(ierr);
1041   } else {
1042     ierr = MatGetColumnReductions_SeqDense(a->A,type,work);CHKERRQ(ierr);
1043   }
1044   if (type == NORM_2) {
1045     for (i=0; i<n; i++) work[i] *= work[i];
1046   }
1047   if (type == NORM_INFINITY) {
1048     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRMPI(ierr);
1049   } else {
1050     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRMPI(ierr);
1051   }
1052   ierr = PetscFree(work);CHKERRQ(ierr);
1053   if (type == NORM_2) {
1054     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1055   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1056     for (i=0; i<n; i++) reductions[i] /= m;
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #if defined(PETSC_HAVE_CUDA)
1062 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1063 {
1064   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1065   PetscErrorCode ierr;
1066   PetscInt       lda;
1067 
1068   PetscFunctionBegin;
1069   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1070   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1071   if (!a->cvec) {
1072     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1073     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1074   }
1075   a->vecinuse = col + 1;
1076   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1077   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1078   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1079   *v   = a->cvec;
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1084 {
1085   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   PetscCheckFalse(!a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1090   PetscCheckFalse(!a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1091   a->vecinuse = 0;
1092   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1093   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1094   *v   = NULL;
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1099 {
1100   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1101   PetscInt       lda;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1106   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1107   if (!a->cvec) {
1108     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1109     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1110   }
1111   a->vecinuse = col + 1;
1112   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1113   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1114   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1115   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1116   *v   = a->cvec;
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1121 {
1122   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   PetscCheckFalse(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1127   PetscCheckFalse(!a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1128   a->vecinuse = 0;
1129   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1130   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1131   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1132   *v   = NULL;
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1137 {
1138   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1139   PetscErrorCode ierr;
1140   PetscInt       lda;
1141 
1142   PetscFunctionBegin;
1143   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1144   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1145   if (!a->cvec) {
1146     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1147     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1148   }
1149   a->vecinuse = col + 1;
1150   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1151   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1152   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1153   *v   = a->cvec;
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1158 {
1159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   PetscCheckFalse(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1164   PetscCheckFalse(!a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1165   a->vecinuse = 0;
1166   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1167   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1168   *v   = NULL;
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1173 {
1174   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   PetscCheckFalse(l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1179   PetscCheckFalse(l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1180   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1185 {
1186   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190   PetscCheckFalse(l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1191   PetscCheckFalse(l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1192   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1197 {
1198   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   PetscCheckFalse(l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1203   PetscCheckFalse(l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1204   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1209 {
1210   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1219 {
1220   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1229 {
1230   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1239 {
1240   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1249 {
1250   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1251   PetscErrorCode ierr;
1252 
1253   PetscFunctionBegin;
1254   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1259 {
1260   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1269 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1270 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1271 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1272 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1273 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1274 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1275 
1276 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1277 {
1278   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282   PetscCheckFalse(d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1283   PetscCheckFalse(d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1284   if (d->A) {
1285     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1286   }
1287   mat->boundtocpu = bind;
1288   if (!bind) {
1289     PetscBool iscuda;
1290 
1291     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1292     if (!iscuda) {
1293       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1294     }
1295     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1296     if (!iscuda) {
1297       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1298     }
1299     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1300     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1301     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1302     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1303     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1304     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1305   } else {
1306     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1307     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1308     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1309     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1310     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1311     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1312   }
1313   if (d->cmat) {
1314     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1315   }
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1320 {
1321   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1322   PetscErrorCode ierr;
1323   PetscBool      iscuda;
1324 
1325   PetscFunctionBegin;
1326   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1327   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1328   if (!iscuda) PetscFunctionReturn(0);
1329   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1330   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1331   if (!d->A) {
1332     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1333     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1334     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1335   }
1336   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1337   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1338   A->preallocated = PETSC_TRUE;
1339   PetscFunctionReturn(0);
1340 }
1341 #endif
1342 
1343 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1344 {
1345   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1354 {
1355   PetscFunctionBegin;
1356   *missing = PETSC_FALSE;
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1361 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1362 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1363 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1364 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1365 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1366 
1367 /* -------------------------------------------------------------------*/
1368 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1369                                         MatGetRow_MPIDense,
1370                                         MatRestoreRow_MPIDense,
1371                                         MatMult_MPIDense,
1372                                 /*  4*/ MatMultAdd_MPIDense,
1373                                         MatMultTranspose_MPIDense,
1374                                         MatMultTransposeAdd_MPIDense,
1375                                         NULL,
1376                                         NULL,
1377                                         NULL,
1378                                 /* 10*/ NULL,
1379                                         NULL,
1380                                         NULL,
1381                                         NULL,
1382                                         MatTranspose_MPIDense,
1383                                 /* 15*/ MatGetInfo_MPIDense,
1384                                         MatEqual_MPIDense,
1385                                         MatGetDiagonal_MPIDense,
1386                                         MatDiagonalScale_MPIDense,
1387                                         MatNorm_MPIDense,
1388                                 /* 20*/ MatAssemblyBegin_MPIDense,
1389                                         MatAssemblyEnd_MPIDense,
1390                                         MatSetOption_MPIDense,
1391                                         MatZeroEntries_MPIDense,
1392                                 /* 24*/ MatZeroRows_MPIDense,
1393                                         NULL,
1394                                         NULL,
1395                                         NULL,
1396                                         NULL,
1397                                 /* 29*/ MatSetUp_MPIDense,
1398                                         NULL,
1399                                         NULL,
1400                                         MatGetDiagonalBlock_MPIDense,
1401                                         NULL,
1402                                 /* 34*/ MatDuplicate_MPIDense,
1403                                         NULL,
1404                                         NULL,
1405                                         NULL,
1406                                         NULL,
1407                                 /* 39*/ MatAXPY_MPIDense,
1408                                         MatCreateSubMatrices_MPIDense,
1409                                         NULL,
1410                                         MatGetValues_MPIDense,
1411                                         MatCopy_MPIDense,
1412                                 /* 44*/ NULL,
1413                                         MatScale_MPIDense,
1414                                         MatShift_Basic,
1415                                         NULL,
1416                                         NULL,
1417                                 /* 49*/ MatSetRandom_MPIDense,
1418                                         NULL,
1419                                         NULL,
1420                                         NULL,
1421                                         NULL,
1422                                 /* 54*/ NULL,
1423                                         NULL,
1424                                         NULL,
1425                                         NULL,
1426                                         NULL,
1427                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1428                                         MatDestroy_MPIDense,
1429                                         MatView_MPIDense,
1430                                         NULL,
1431                                         NULL,
1432                                 /* 64*/ NULL,
1433                                         NULL,
1434                                         NULL,
1435                                         NULL,
1436                                         NULL,
1437                                 /* 69*/ NULL,
1438                                         NULL,
1439                                         NULL,
1440                                         NULL,
1441                                         NULL,
1442                                 /* 74*/ NULL,
1443                                         NULL,
1444                                         NULL,
1445                                         NULL,
1446                                         NULL,
1447                                 /* 79*/ NULL,
1448                                         NULL,
1449                                         NULL,
1450                                         NULL,
1451                                 /* 83*/ MatLoad_MPIDense,
1452                                         NULL,
1453                                         NULL,
1454                                         NULL,
1455                                         NULL,
1456                                         NULL,
1457                                 /* 89*/ NULL,
1458                                         NULL,
1459                                         NULL,
1460                                         NULL,
1461                                         NULL,
1462                                 /* 94*/ NULL,
1463                                         NULL,
1464                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1465                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1466                                         NULL,
1467                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1468                                         NULL,
1469                                         NULL,
1470                                         MatConjugate_MPIDense,
1471                                         NULL,
1472                                 /*104*/ NULL,
1473                                         MatRealPart_MPIDense,
1474                                         MatImaginaryPart_MPIDense,
1475                                         NULL,
1476                                         NULL,
1477                                 /*109*/ NULL,
1478                                         NULL,
1479                                         NULL,
1480                                         MatGetColumnVector_MPIDense,
1481                                         MatMissingDiagonal_MPIDense,
1482                                 /*114*/ NULL,
1483                                         NULL,
1484                                         NULL,
1485                                         NULL,
1486                                         NULL,
1487                                 /*119*/ NULL,
1488                                         NULL,
1489                                         NULL,
1490                                         NULL,
1491                                         NULL,
1492                                 /*124*/ NULL,
1493                                         MatGetColumnReductions_MPIDense,
1494                                         NULL,
1495                                         NULL,
1496                                         NULL,
1497                                 /*129*/ NULL,
1498                                         NULL,
1499                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1500                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1501                                         NULL,
1502                                 /*134*/ NULL,
1503                                         NULL,
1504                                         NULL,
1505                                         NULL,
1506                                         NULL,
1507                                 /*139*/ NULL,
1508                                         NULL,
1509                                         NULL,
1510                                         NULL,
1511                                         NULL,
1512                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1513                                 /*145*/ NULL,
1514                                         NULL,
1515                                         NULL
1516 };
1517 
1518 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1519 {
1520   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1521   PetscBool      iscuda;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1526   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1527   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1528   if (!a->A) {
1529     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1530     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1531     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1532   }
1533   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1534   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1535   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1536   mat->preallocated = PETSC_TRUE;
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1541 {
1542   Mat            B,C;
1543   PetscErrorCode ierr;
1544 
1545   PetscFunctionBegin;
1546   ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr);
1547   ierr = MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1548   ierr = MatDestroy(&C);CHKERRQ(ierr);
1549   if (reuse == MAT_REUSE_MATRIX) {
1550     C = *newmat;
1551   } else C = NULL;
1552   ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1553   ierr = MatDestroy(&B);CHKERRQ(ierr);
1554   if (reuse == MAT_INPLACE_MATRIX) {
1555     ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr);
1556   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1561 {
1562   Mat            B,C;
1563   PetscErrorCode ierr;
1564 
1565   PetscFunctionBegin;
1566   ierr = MatDenseGetLocalMatrix(A,&C);CHKERRQ(ierr);
1567   ierr = MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1568   if (reuse == MAT_REUSE_MATRIX) {
1569     C = *newmat;
1570   } else C = NULL;
1571   ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1572   ierr = MatDestroy(&B);CHKERRQ(ierr);
1573   if (reuse == MAT_INPLACE_MATRIX) {
1574     ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr);
1575   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #if defined(PETSC_HAVE_ELEMENTAL)
1580 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1581 {
1582   Mat            mat_elemental;
1583   PetscErrorCode ierr;
1584   PetscScalar    *v;
1585   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1586 
1587   PetscFunctionBegin;
1588   if (reuse == MAT_REUSE_MATRIX) {
1589     mat_elemental = *newmat;
1590     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1591   } else {
1592     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1593     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1594     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1595     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1596     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1597   }
1598 
1599   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1600   for (i=0; i<N; i++) cols[i] = i;
1601   for (i=0; i<m; i++) rows[i] = rstart + i;
1602 
1603   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1604   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1605   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1606   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1608   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1609   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1610 
1611   if (reuse == MAT_INPLACE_MATRIX) {
1612     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1613   } else {
1614     *newmat = mat_elemental;
1615   }
1616   PetscFunctionReturn(0);
1617 }
1618 #endif
1619 
1620 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1621 {
1622   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1623   PetscErrorCode ierr;
1624 
1625   PetscFunctionBegin;
1626   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1631 {
1632   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1633   PetscErrorCode ierr;
1634 
1635   PetscFunctionBegin;
1636   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1641 {
1642   PetscErrorCode ierr;
1643   Mat_MPIDense   *mat;
1644   PetscInt       m,nloc,N;
1645 
1646   PetscFunctionBegin;
1647   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1648   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1649   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1650     PetscInt sum;
1651 
1652     if (n == PETSC_DECIDE) {
1653       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1654     }
1655     /* Check sum(n) = N */
1656     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
1657     PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1658 
1659     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1660     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
1661   }
1662 
1663   /* numeric phase */
1664   mat = (Mat_MPIDense*)(*outmat)->data;
1665   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1666   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 #if defined(PETSC_HAVE_CUDA)
1672 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1673 {
1674   Mat            B;
1675   Mat_MPIDense   *m;
1676   PetscErrorCode ierr;
1677 
1678   PetscFunctionBegin;
1679   if (reuse == MAT_INITIAL_MATRIX) {
1680     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1681   } else if (reuse == MAT_REUSE_MATRIX) {
1682     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1683   }
1684 
1685   B    = *newmat;
1686   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1687   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1688   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1689   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1696   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1697   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1698   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1699   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1700   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1701   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1702   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1703   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1704   m    = (Mat_MPIDense*)(B)->data;
1705   if (m->A) {
1706     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1707     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1708   }
1709   B->ops->bindtocpu = NULL;
1710   B->offloadmask    = PETSC_OFFLOAD_CPU;
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1715 {
1716   Mat            B;
1717   Mat_MPIDense   *m;
1718   PetscErrorCode ierr;
1719 
1720   PetscFunctionBegin;
1721   if (reuse == MAT_INITIAL_MATRIX) {
1722     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1723   } else if (reuse == MAT_REUSE_MATRIX) {
1724     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1725   }
1726 
1727   B    = *newmat;
1728   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1729   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1730   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1731   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1732   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1733   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1734   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1735   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1736   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1737   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1738   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1739   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1740   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1741   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1742   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1743   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1744   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1745   m    = (Mat_MPIDense*)(B->data);
1746   if (m->A) {
1747     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1748     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1749     B->offloadmask = PETSC_OFFLOAD_BOTH;
1750   } else {
1751     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1752   }
1753   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1754 
1755   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1756   PetscFunctionReturn(0);
1757 }
1758 #endif
1759 
1760 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1761 {
1762   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1763   PetscErrorCode ierr;
1764   PetscInt       lda;
1765 
1766   PetscFunctionBegin;
1767   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1768   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1769   if (!a->cvec) {
1770     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1771   }
1772   a->vecinuse = col + 1;
1773   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1774   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1775   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1776   *v   = a->cvec;
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1781 {
1782   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1783   PetscErrorCode ierr;
1784 
1785   PetscFunctionBegin;
1786   PetscCheckFalse(!a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1787   PetscCheckFalse(!a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1788   a->vecinuse = 0;
1789   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1790   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1791   *v   = NULL;
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1796 {
1797   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1798   PetscErrorCode ierr;
1799   PetscInt       lda;
1800 
1801   PetscFunctionBegin;
1802   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1803   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1804   if (!a->cvec) {
1805     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1806   }
1807   a->vecinuse = col + 1;
1808   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1809   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1810   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1811   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1812   *v   = a->cvec;
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1817 {
1818   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1819   PetscErrorCode ierr;
1820 
1821   PetscFunctionBegin;
1822   PetscCheckFalse(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1823   PetscCheckFalse(!a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1824   a->vecinuse = 0;
1825   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1826   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1827   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1828   *v   = NULL;
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1833 {
1834   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1835   PetscErrorCode ierr;
1836   PetscInt       lda;
1837 
1838   PetscFunctionBegin;
1839   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1840   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1841   if (!a->cvec) {
1842     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1843   }
1844   a->vecinuse = col + 1;
1845   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1846   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1847   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1848   *v   = a->cvec;
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1853 {
1854   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1855   PetscErrorCode ierr;
1856 
1857   PetscFunctionBegin;
1858   PetscCheckFalse(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1859   PetscCheckFalse(!a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1860   a->vecinuse = 0;
1861   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1862   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1863   *v   = NULL;
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1868 {
1869   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1870   Mat_MPIDense   *c;
1871   PetscErrorCode ierr;
1872   MPI_Comm       comm;
1873   PetscBool      setup = PETSC_FALSE;
1874 
1875   PetscFunctionBegin;
1876   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1877   PetscCheckFalse(a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1878   PetscCheckFalse(a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1879   if (!a->cmat) {
1880     setup = PETSC_TRUE;
1881     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1882     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1883     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1884     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1885     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1886     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1887   } else if (cend-cbegin != a->cmat->cmap->N) {
1888     setup = PETSC_TRUE;
1889     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1890     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1891     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1892     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1893   }
1894   c = (Mat_MPIDense*)a->cmat->data;
1895   PetscCheckFalse(c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1896   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1897   if (setup) { /* do we really need this? */
1898     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1899   }
1900   a->cmat->preallocated = PETSC_TRUE;
1901   a->cmat->assembled = PETSC_TRUE;
1902   a->matinuse = cbegin + 1;
1903   *v = a->cmat;
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1908 {
1909   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1910   Mat_MPIDense   *c;
1911   PetscErrorCode ierr;
1912 
1913   PetscFunctionBegin;
1914   PetscCheckFalse(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1915   PetscCheckFalse(!a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1916   PetscCheckFalse(*v != a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1917   a->matinuse = 0;
1918   c    = (Mat_MPIDense*)a->cmat->data;
1919   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1920   *v   = NULL;
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 /*MC
1925    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1926 
1927    Options Database Keys:
1928 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1929 
1930   Level: beginner
1931 
1932 .seealso: MatCreateDense()
1933 
1934 M*/
1935 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1936 {
1937   Mat_MPIDense   *a;
1938   PetscErrorCode ierr;
1939 
1940   PetscFunctionBegin;
1941   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1942   mat->data = (void*)a;
1943   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1944 
1945   mat->insertmode = NOT_SET_VALUES;
1946 
1947   /* build cache for off array entries formed */
1948   a->donotstash = PETSC_FALSE;
1949 
1950   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1951 
1952   /* stuff used for matrix vector multiply */
1953   a->lvec        = NULL;
1954   a->Mvctx       = NULL;
1955   a->roworiented = PETSC_TRUE;
1956 
1957   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1958   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1959   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1960   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1961   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1962   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1963   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1964   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1965   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1966   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1967   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1968   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1969   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1970   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1971   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1972   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1973   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1974   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1975   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1976   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
1977   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ);CHKERRQ(ierr);
1978 #if defined(PETSC_HAVE_ELEMENTAL)
1979   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1980 #endif
1981 #if defined(PETSC_HAVE_SCALAPACK)
1982   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1983 #endif
1984 #if defined(PETSC_HAVE_CUDA)
1985   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1986 #endif
1987   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1988   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1989   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1990 #if defined(PETSC_HAVE_CUDA)
1991   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1992   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1993 #endif
1994 
1995   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1996   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1997   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 /*MC
2002    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
2003 
2004    Options Database Keys:
2005 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
2006 
2007   Level: beginner
2008 
2009 .seealso:
2010 
2011 M*/
2012 #if defined(PETSC_HAVE_CUDA)
2013 #include <petsc/private/deviceimpl.h>
2014 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2015 {
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   ierr = PetscDeviceInitialize(PETSC_DEVICE_CUDA);CHKERRQ(ierr);
2020   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
2021   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 #endif
2025 
2026 /*MC
2027    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2028 
2029    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
2030    and MATMPIDENSE otherwise.
2031 
2032    Options Database Keys:
2033 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
2034 
2035   Level: beginner
2036 
2037 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
2038 M*/
2039 
2040 /*MC
2041    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2042 
2043    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
2044    and MATMPIDENSECUDA otherwise.
2045 
2046    Options Database Keys:
2047 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
2048 
2049   Level: beginner
2050 
2051 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
2052 M*/
2053 
2054 /*@C
2055    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2056 
2057    Collective
2058 
2059    Input Parameters:
2060 .  B - the matrix
2061 -  data - optional location of matrix data.  Set data=NULL for PETSc
2062    to control all matrix memory allocation.
2063 
2064    Notes:
2065    The dense format is fully compatible with standard Fortran 77
2066    storage by columns.
2067 
2068    The data input variable is intended primarily for Fortran programmers
2069    who wish to allocate their own matrix memory space.  Most users should
2070    set data=NULL.
2071 
2072    Level: intermediate
2073 
2074 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2075 @*/
2076 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2077 {
2078   PetscErrorCode ierr;
2079 
2080   PetscFunctionBegin;
2081   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2082   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 /*@
2087    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2088    array provided by the user. This is useful to avoid copying an array
2089    into a matrix
2090 
2091    Not Collective
2092 
2093    Input Parameters:
2094 +  mat - the matrix
2095 -  array - the array in column major order
2096 
2097    Notes:
2098    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2099    freed when the matrix is destroyed.
2100 
2101    Level: developer
2102 
2103 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2104 
2105 @*/
2106 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2107 {
2108   PetscErrorCode ierr;
2109 
2110   PetscFunctionBegin;
2111   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2112   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2113   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2114 #if defined(PETSC_HAVE_CUDA)
2115   mat->offloadmask = PETSC_OFFLOAD_CPU;
2116 #endif
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 /*@
2121    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2122 
2123    Not Collective
2124 
2125    Input Parameters:
2126 .  mat - the matrix
2127 
2128    Notes:
2129    You can only call this after a call to MatDensePlaceArray()
2130 
2131    Level: developer
2132 
2133 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2134 
2135 @*/
2136 PetscErrorCode  MatDenseResetArray(Mat mat)
2137 {
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2142   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2143   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 /*@
2148    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2149    array provided by the user. This is useful to avoid copying an array
2150    into a matrix
2151 
2152    Not Collective
2153 
2154    Input Parameters:
2155 +  mat - the matrix
2156 -  array - the array in column major order
2157 
2158    Notes:
2159    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2160    freed by the user. It will be freed when the matrix is destroyed.
2161 
2162    Level: developer
2163 
2164 .seealso: MatDenseGetArray(), VecReplaceArray()
2165 @*/
2166 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2167 {
2168   PetscErrorCode ierr;
2169 
2170   PetscFunctionBegin;
2171   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2172   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2173   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2174 #if defined(PETSC_HAVE_CUDA)
2175   mat->offloadmask = PETSC_OFFLOAD_CPU;
2176 #endif
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 #if defined(PETSC_HAVE_CUDA)
2181 /*@C
2182    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2183    array provided by the user. This is useful to avoid copying an array
2184    into a matrix
2185 
2186    Not Collective
2187 
2188    Input Parameters:
2189 +  mat - the matrix
2190 -  array - the array in column major order
2191 
2192    Notes:
2193    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2194    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2195 
2196    Level: developer
2197 
2198 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2199 @*/
2200 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2201 {
2202   PetscErrorCode ierr;
2203 
2204   PetscFunctionBegin;
2205   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2206   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2207   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2208   mat->offloadmask = PETSC_OFFLOAD_GPU;
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 /*@C
2213    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2214 
2215    Not Collective
2216 
2217    Input Parameters:
2218 .  mat - the matrix
2219 
2220    Notes:
2221    You can only call this after a call to MatDenseCUDAPlaceArray()
2222 
2223    Level: developer
2224 
2225 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2226 
2227 @*/
2228 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2229 {
2230   PetscErrorCode ierr;
2231 
2232   PetscFunctionBegin;
2233   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2234   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2235   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 /*@C
2240    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2241    array provided by the user. This is useful to avoid copying an array
2242    into a matrix
2243 
2244    Not Collective
2245 
2246    Input Parameters:
2247 +  mat - the matrix
2248 -  array - the array in column major order
2249 
2250    Notes:
2251    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2252    The memory passed in CANNOT be freed by the user. It will be freed
2253    when the matrix is destroyed. The array should respect the matrix leading dimension.
2254 
2255    Level: developer
2256 
2257 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2258 @*/
2259 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2265   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2266   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2267   mat->offloadmask = PETSC_OFFLOAD_GPU;
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 /*@C
2272    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2273 
2274    Not Collective
2275 
2276    Input Parameters:
2277 .  A - the matrix
2278 
2279    Output Parameters
2280 .  array - the GPU array in column major order
2281 
2282    Notes:
2283    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2284 
2285    Level: developer
2286 
2287 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2288 @*/
2289 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2290 {
2291   PetscErrorCode ierr;
2292 
2293   PetscFunctionBegin;
2294   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2295   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2296   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 /*@C
2301    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2302 
2303    Not Collective
2304 
2305    Input Parameters:
2306 +  A - the matrix
2307 -  array - the GPU array in column major order
2308 
2309    Notes:
2310 
2311    Level: developer
2312 
2313 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2314 @*/
2315 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2316 {
2317   PetscErrorCode ierr;
2318 
2319   PetscFunctionBegin;
2320   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2321   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2322   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2323   A->offloadmask = PETSC_OFFLOAD_GPU;
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 /*@C
2328    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2329 
2330    Not Collective
2331 
2332    Input Parameters:
2333 .  A - the matrix
2334 
2335    Output Parameters
2336 .  array - the GPU array in column major order
2337 
2338    Notes:
2339    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2340 
2341    Level: developer
2342 
2343 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2344 @*/
2345 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2346 {
2347   PetscErrorCode ierr;
2348 
2349   PetscFunctionBegin;
2350   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2351   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 /*@C
2356    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2357 
2358    Not Collective
2359 
2360    Input Parameters:
2361 +  A - the matrix
2362 -  array - the GPU array in column major order
2363 
2364    Notes:
2365    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2366 
2367    Level: developer
2368 
2369 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2370 @*/
2371 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2372 {
2373   PetscErrorCode ierr;
2374 
2375   PetscFunctionBegin;
2376   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 /*@C
2381    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2382 
2383    Not Collective
2384 
2385    Input Parameters:
2386 .  A - the matrix
2387 
2388    Output Parameters
2389 .  array - the GPU array in column major order
2390 
2391    Notes:
2392    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2393 
2394    Level: developer
2395 
2396 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2397 @*/
2398 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2399 {
2400   PetscErrorCode ierr;
2401 
2402   PetscFunctionBegin;
2403   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2404   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2405   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 /*@C
2410    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2411 
2412    Not Collective
2413 
2414    Input Parameters:
2415 +  A - the matrix
2416 -  array - the GPU array in column major order
2417 
2418    Notes:
2419 
2420    Level: developer
2421 
2422 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2423 @*/
2424 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2425 {
2426   PetscErrorCode ierr;
2427 
2428   PetscFunctionBegin;
2429   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2430   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2431   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2432   A->offloadmask = PETSC_OFFLOAD_GPU;
2433   PetscFunctionReturn(0);
2434 }
2435 #endif
2436 
2437 /*@C
2438    MatCreateDense - Creates a matrix in dense format.
2439 
2440    Collective
2441 
2442    Input Parameters:
2443 +  comm - MPI communicator
2444 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2445 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2446 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2447 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2448 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2449    to control all matrix memory allocation.
2450 
2451    Output Parameter:
2452 .  A - the matrix
2453 
2454    Notes:
2455    The dense format is fully compatible with standard Fortran 77
2456    storage by columns.
2457    Note that, although local portions of the matrix are stored in column-major
2458    order, the matrix is partitioned across MPI ranks by row.
2459 
2460    The data input variable is intended primarily for Fortran programmers
2461    who wish to allocate their own matrix memory space.  Most users should
2462    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2463 
2464    The user MUST specify either the local or global matrix dimensions
2465    (possibly both).
2466 
2467    Level: intermediate
2468 
2469 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2470 @*/
2471 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2472 {
2473   PetscErrorCode ierr;
2474   PetscMPIInt    size;
2475 
2476   PetscFunctionBegin;
2477   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2478   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2479   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2480   if (size > 1) {
2481     PetscBool havedata = (PetscBool)!!data;
2482 
2483     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2484     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2485     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr);
2486     if (havedata) {  /* user provided data array, so no need to assemble */
2487       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2488       (*A)->assembled = PETSC_TRUE;
2489     }
2490   } else {
2491     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2492     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2493   }
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 #if defined(PETSC_HAVE_CUDA)
2498 /*@C
2499    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2500 
2501    Collective
2502 
2503    Input Parameters:
2504 +  comm - MPI communicator
2505 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2506 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2507 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2508 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2509 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2510    to control matrix memory allocation.
2511 
2512    Output Parameter:
2513 .  A - the matrix
2514 
2515    Notes:
2516 
2517    Level: intermediate
2518 
2519 .seealso: MatCreate(), MatCreateDense()
2520 @*/
2521 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2522 {
2523   PetscErrorCode ierr;
2524   PetscMPIInt    size;
2525 
2526   PetscFunctionBegin;
2527   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2528   PetscValidLogicalCollectiveBool(*A,!!data,6);
2529   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2530   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2531   if (size > 1) {
2532     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2533     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2534     if (data) {  /* user provided data array, so no need to assemble */
2535       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2536       (*A)->assembled = PETSC_TRUE;
2537     }
2538   } else {
2539     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2540     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2541   }
2542   PetscFunctionReturn(0);
2543 }
2544 #endif
2545 
2546 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2547 {
2548   Mat            mat;
2549   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2550   PetscErrorCode ierr;
2551 
2552   PetscFunctionBegin;
2553   *newmat = NULL;
2554   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2555   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2556   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2557   a       = (Mat_MPIDense*)mat->data;
2558 
2559   mat->factortype   = A->factortype;
2560   mat->assembled    = PETSC_TRUE;
2561   mat->preallocated = PETSC_TRUE;
2562 
2563   mat->insertmode = NOT_SET_VALUES;
2564   a->donotstash   = oldmat->donotstash;
2565 
2566   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2567   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2568 
2569   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2570   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2571   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2572 
2573   *newmat = mat;
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2578 {
2579   PetscErrorCode ierr;
2580   PetscBool      isbinary;
2581 #if defined(PETSC_HAVE_HDF5)
2582   PetscBool      ishdf5;
2583 #endif
2584 
2585   PetscFunctionBegin;
2586   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2587   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2588   /* force binary viewer to load .info file if it has not yet done so */
2589   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2590   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2591 #if defined(PETSC_HAVE_HDF5)
2592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2593 #endif
2594   if (isbinary) {
2595     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2596 #if defined(PETSC_HAVE_HDF5)
2597   } else if (ishdf5) {
2598     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2599 #endif
2600   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2605 {
2606   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2607   Mat            a,b;
2608   PetscBool      flg;
2609   PetscErrorCode ierr;
2610 
2611   PetscFunctionBegin;
2612   a    = matA->A;
2613   b    = matB->A;
2614   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2615   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2620 {
2621   PetscErrorCode        ierr;
2622   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2623 
2624   PetscFunctionBegin;
2625   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2626   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2627   ierr = PetscFree(atb);CHKERRQ(ierr);
2628   PetscFunctionReturn(0);
2629 }
2630 
2631 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2632 {
2633   PetscErrorCode        ierr;
2634   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2635 
2636   PetscFunctionBegin;
2637   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2638   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2639   ierr = PetscFree(abt);CHKERRQ(ierr);
2640   PetscFunctionReturn(0);
2641 }
2642 
2643 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2644 {
2645   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2646   Mat_TransMatMultDense *atb;
2647   PetscErrorCode        ierr;
2648   MPI_Comm              comm;
2649   PetscMPIInt           size,*recvcounts;
2650   PetscScalar           *carray,*sendbuf;
2651   const PetscScalar     *atbarray;
2652   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2653   const PetscInt        *ranges;
2654 
2655   PetscFunctionBegin;
2656   MatCheckProduct(C,3);
2657   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2658   atb = (Mat_TransMatMultDense *)C->product->data;
2659   recvcounts = atb->recvcounts;
2660   sendbuf = atb->sendbuf;
2661 
2662   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2663   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2664 
2665   /* compute atbarray = aseq^T * bseq */
2666   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2667 
2668   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2669 
2670   /* arrange atbarray into sendbuf */
2671   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2672   for (proc=0, k=0; proc<size; proc++) {
2673     for (j=0; j<cN; j++) {
2674       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2675     }
2676   }
2677   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2678 
2679   /* sum all atbarray to local values of C */
2680   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2681   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr);
2682   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2683   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2684   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2689 {
2690   PetscErrorCode        ierr;
2691   MPI_Comm              comm;
2692   PetscMPIInt           size;
2693   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2694   Mat_TransMatMultDense *atb;
2695   PetscBool             cisdense;
2696   PetscInt              i;
2697   const PetscInt        *ranges;
2698 
2699   PetscFunctionBegin;
2700   MatCheckProduct(C,3);
2701   PetscCheckFalse(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2702   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2703   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2704     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2705   }
2706 
2707   /* create matrix product C */
2708   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2709   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2710   if (!cisdense) {
2711     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2712   }
2713   ierr = MatSetUp(C);CHKERRQ(ierr);
2714 
2715   /* create data structure for reuse C */
2716   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2717   ierr = PetscNew(&atb);CHKERRQ(ierr);
2718   cM   = C->rmap->N;
2719   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2720   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2721   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2722 
2723   C->product->data    = atb;
2724   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2729 {
2730   PetscErrorCode        ierr;
2731   MPI_Comm              comm;
2732   PetscMPIInt           i, size;
2733   PetscInt              maxRows, bufsiz;
2734   PetscMPIInt           tag;
2735   PetscInt              alg;
2736   Mat_MatTransMultDense *abt;
2737   Mat_Product           *product = C->product;
2738   PetscBool             flg;
2739 
2740   PetscFunctionBegin;
2741   MatCheckProduct(C,4);
2742   PetscCheckFalse(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2743   /* check local size of A and B */
2744   PetscCheckFalse(A->cmap->n != B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2745 
2746   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2747   alg  = flg ? 0 : 1;
2748 
2749   /* setup matrix product C */
2750   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2751   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2752   ierr = MatSetUp(C);CHKERRQ(ierr);
2753   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2754 
2755   /* create data structure for reuse C */
2756   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2757   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2758   ierr = PetscNew(&abt);CHKERRQ(ierr);
2759   abt->tag = tag;
2760   abt->alg = alg;
2761   switch (alg) {
2762   case 1: /* alg: "cyclic" */
2763     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2764     bufsiz = A->cmap->N * maxRows;
2765     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2766     break;
2767   default: /* alg: "allgatherv" */
2768     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2769     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2770     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2771     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2772     break;
2773   }
2774 
2775   C->product->data    = abt;
2776   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2777   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2782 {
2783   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2784   Mat_MatTransMultDense *abt;
2785   PetscErrorCode        ierr;
2786   MPI_Comm              comm;
2787   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2788   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2789   PetscInt              i,cK=A->cmap->N,k,j,bn;
2790   PetscScalar           _DOne=1.0,_DZero=0.0;
2791   const PetscScalar     *av,*bv;
2792   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2793   MPI_Request           reqs[2];
2794   const PetscInt        *ranges;
2795 
2796   PetscFunctionBegin;
2797   MatCheckProduct(C,3);
2798   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2799   abt  = (Mat_MatTransMultDense*)C->product->data;
2800   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2801   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2802   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2803   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2804   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2805   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2806   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2807   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2808   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2809   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2810   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2811   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2812   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2813   bn   = B->rmap->n;
2814   if (blda == bn) {
2815     sendbuf = (PetscScalar*)bv;
2816   } else {
2817     sendbuf = abt->buf[0];
2818     for (k = 0, i = 0; i < cK; i++) {
2819       for (j = 0; j < bn; j++, k++) {
2820         sendbuf[k] = bv[i * blda + j];
2821       }
2822     }
2823   }
2824   if (size > 1) {
2825     sendto = (rank + size - 1) % size;
2826     recvfrom = (rank + size + 1) % size;
2827   } else {
2828     sendto = recvfrom = 0;
2829   }
2830   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2831   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2832   recvisfrom = rank;
2833   for (i = 0; i < size; i++) {
2834     /* we have finished receiving in sending, bufs can be read/modified */
2835     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2836     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2837 
2838     if (nextrecvisfrom != rank) {
2839       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2840       sendsiz = cK * bn;
2841       recvsiz = cK * nextbn;
2842       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2843       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr);
2844       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr);
2845     }
2846 
2847     /* local aseq * sendbuf^T */
2848     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2849     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2850 
2851     if (nextrecvisfrom != rank) {
2852       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2853       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
2854     }
2855     bn = nextbn;
2856     recvisfrom = nextrecvisfrom;
2857     sendbuf = recvbuf;
2858   }
2859   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2860   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2861   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2862   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2863   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2868 {
2869   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2870   Mat_MatTransMultDense *abt;
2871   PetscErrorCode        ierr;
2872   MPI_Comm              comm;
2873   PetscMPIInt           size;
2874   PetscScalar           *cv, *sendbuf, *recvbuf;
2875   const PetscScalar     *av,*bv;
2876   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2877   PetscScalar           _DOne=1.0,_DZero=0.0;
2878   PetscBLASInt          cm, cn, ck, alda, clda;
2879 
2880   PetscFunctionBegin;
2881   MatCheckProduct(C,3);
2882   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2883   abt  = (Mat_MatTransMultDense*)C->product->data;
2884   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2885   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2886   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2887   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2888   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2889   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2890   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2891   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2892   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2893   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2894   /* copy transpose of B into buf[0] */
2895   bn      = B->rmap->n;
2896   sendbuf = abt->buf[0];
2897   recvbuf = abt->buf[1];
2898   for (k = 0, j = 0; j < bn; j++) {
2899     for (i = 0; i < cK; i++, k++) {
2900       sendbuf[k] = bv[i * blda + j];
2901     }
2902   }
2903   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2904   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr);
2905   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2906   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2907   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2908   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2909   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2910   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2911   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2912   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2913   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2914   PetscFunctionReturn(0);
2915 }
2916 
2917 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2918 {
2919   Mat_MatTransMultDense *abt;
2920   PetscErrorCode        ierr;
2921 
2922   PetscFunctionBegin;
2923   MatCheckProduct(C,3);
2924   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2925   abt = (Mat_MatTransMultDense*)C->product->data;
2926   switch (abt->alg) {
2927   case 1:
2928     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2929     break;
2930   default:
2931     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2932     break;
2933   }
2934   PetscFunctionReturn(0);
2935 }
2936 
2937 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2938 {
2939   PetscErrorCode   ierr;
2940   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2941 
2942   PetscFunctionBegin;
2943   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2944   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2945   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2946   ierr = PetscFree(ab);CHKERRQ(ierr);
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 #if defined(PETSC_HAVE_ELEMENTAL)
2951 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2952 {
2953   PetscErrorCode   ierr;
2954   Mat_MatMultDense *ab;
2955 
2956   PetscFunctionBegin;
2957   MatCheckProduct(C,3);
2958   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2959   ab   = (Mat_MatMultDense*)C->product->data;
2960   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2961   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2962   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2963   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2968 {
2969   PetscErrorCode   ierr;
2970   Mat              Ae,Be,Ce;
2971   Mat_MatMultDense *ab;
2972 
2973   PetscFunctionBegin;
2974   MatCheckProduct(C,4);
2975   PetscCheckFalse(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2976   /* check local size of A and B */
2977   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2978     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2979   }
2980 
2981   /* create elemental matrices Ae and Be */
2982   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2983   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2984   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2985   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2986   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2987 
2988   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2989   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2990   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2991   ierr = MatSetUp(Be);CHKERRQ(ierr);
2992   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2993 
2994   /* compute symbolic Ce = Ae*Be */
2995   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2996   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2997 
2998   /* setup C */
2999   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
3000   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
3001   ierr = MatSetUp(C);CHKERRQ(ierr);
3002 
3003   /* create data structure for reuse Cdense */
3004   ierr = PetscNew(&ab);CHKERRQ(ierr);
3005   ab->Ae = Ae;
3006   ab->Be = Be;
3007   ab->Ce = Ce;
3008 
3009   C->product->data    = ab;
3010   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
3011   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3012   PetscFunctionReturn(0);
3013 }
3014 #endif
3015 /* ----------------------------------------------- */
3016 #if defined(PETSC_HAVE_ELEMENTAL)
3017 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3018 {
3019   PetscFunctionBegin;
3020   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3021   C->ops->productsymbolic = MatProductSymbolic_AB;
3022   PetscFunctionReturn(0);
3023 }
3024 #endif
3025 
3026 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3027 {
3028   Mat_Product *product = C->product;
3029   Mat         A = product->A,B=product->B;
3030 
3031   PetscFunctionBegin;
3032   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3033     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
3034   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3035   C->ops->productsymbolic = MatProductSymbolic_AtB;
3036   PetscFunctionReturn(0);
3037 }
3038 
3039 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3040 {
3041   PetscErrorCode ierr;
3042   Mat_Product    *product = C->product;
3043   const char     *algTypes[2] = {"allgatherv","cyclic"};
3044   PetscInt       alg,nalg = 2;
3045   PetscBool      flg = PETSC_FALSE;
3046 
3047   PetscFunctionBegin;
3048   /* Set default algorithm */
3049   alg = 0; /* default is allgatherv */
3050   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
3051   if (flg) {
3052     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3053   }
3054 
3055   /* Get runtime option */
3056   if (product->api_user) {
3057     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
3058     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3059     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3060   } else {
3061     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
3062     ierr = PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3063     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3064   }
3065   if (flg) {
3066     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3067   }
3068 
3069   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3070   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3071   PetscFunctionReturn(0);
3072 }
3073 
3074 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3075 {
3076   PetscErrorCode ierr;
3077   Mat_Product    *product = C->product;
3078 
3079   PetscFunctionBegin;
3080   switch (product->type) {
3081 #if defined(PETSC_HAVE_ELEMENTAL)
3082   case MATPRODUCT_AB:
3083     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3084     break;
3085 #endif
3086   case MATPRODUCT_AtB:
3087     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3088     break;
3089   case MATPRODUCT_ABt:
3090     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3091     break;
3092   default:
3093     break;
3094   }
3095   PetscFunctionReturn(0);
3096 }
3097