xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 030f984af8d8bb4c203755d35bded3c05b3d83ce)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
46   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
47   PetscErrorCode ierr;
48 
49   PetscFunctionBegin;
50   ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
55 {
56   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
57   PetscErrorCode ierr;
58   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
59 
60   PetscFunctionBegin;
61   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
62   lrow = row - rstart;
63   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
68 {
69   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
70   PetscErrorCode ierr;
71   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
72 
73   PetscFunctionBegin;
74   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
75   lrow = row - rstart;
76   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
81 {
82   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
83   PetscErrorCode ierr;
84   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
85   PetscScalar    *array;
86   MPI_Comm       comm;
87   PetscBool      flg;
88   Mat            B;
89 
90   PetscFunctionBegin;
91   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
92   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
93   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
94   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
95 
96     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
97     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
98     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
99     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
100     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
101     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
102     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
103     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
104     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
105     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
106     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
107     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
108     *a   = B;
109     ierr = MatDestroy(&B);CHKERRQ(ierr);
110   } else *a = B;
111   PetscFunctionReturn(0);
112 }
113 
114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
117   PetscErrorCode ierr;
118   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
119   PetscBool      roworiented = A->roworiented;
120 
121   PetscFunctionBegin;
122   for (i=0; i<m; i++) {
123     if (idxm[i] < 0) continue;
124     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
125     if (idxm[i] >= rstart && idxm[i] < rend) {
126       row = idxm[i] - rstart;
127       if (roworiented) {
128         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
129       } else {
130         for (j=0; j<n; j++) {
131           if (idxn[j] < 0) continue;
132           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
133           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
134         }
135       }
136     } else if (!A->donotstash) {
137       mat->assembled = PETSC_FALSE;
138       if (roworiented) {
139         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
140       } else {
141         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
142       }
143     }
144   }
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
149 {
150   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
151   PetscErrorCode ierr;
152   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
153 
154   PetscFunctionBegin;
155   for (i=0; i<m; i++) {
156     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
157     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
158     if (idxm[i] >= rstart && idxm[i] < rend) {
159       row = idxm[i] - rstart;
160       for (j=0; j<n; j++) {
161         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
162         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
163         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
164       }
165     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
181 {
182   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
183   PetscBool      iscuda;
184   PetscErrorCode ierr;
185 
186   PetscFunctionBegin;
187   if (!a->A) {
188     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
189     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
190     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
191     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
192     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
193     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
194     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
195     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
196   }
197   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
230   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
235 {
236   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
237   PetscErrorCode ierr;
238 
239   PetscFunctionBegin;
240   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
241   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
242   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
247 {
248   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
253   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
254   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
259 {
260   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
261   PetscErrorCode ierr;
262 
263   PetscFunctionBegin;
264   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
265   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
266   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
271 {
272   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
273   PetscErrorCode    ierr;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
284   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
285   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
288   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
289   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
290   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
291   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
292   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
299   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
309     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
310     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
311     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
317   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
318   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
326   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
327 
328   /* Assemble the matrices so that the correct flags are set */
329   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331 
332   /* Free work space */
333   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
334   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
335   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343   PetscErrorCode ierr;
344 
345   PetscFunctionBegin;
346   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
351 {
352   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
361 {
362   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
363   PetscErrorCode ierr;
364 
365   PetscFunctionBegin;
366   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
371 {
372   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
373   PetscErrorCode ierr;
374   PetscInt       nstash,reallocs;
375 
376   PetscFunctionBegin;
377   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
378 
379   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
380   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
381   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
386 {
387   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
388   PetscErrorCode ierr;
389   PetscInt       i,*row,*col,flg,j,rstart,ncols;
390   PetscMPIInt    n;
391   PetscScalar    *val;
392 
393   PetscFunctionBegin;
394   if (!mdn->donotstash && !mat->nooffprocentries) {
395     /*  wait on receives */
396     while (1) {
397       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
398       if (!flg) break;
399 
400       for (i=0; i<n;) {
401         /* Now identify the consecutive vals belonging to the same row */
402         for (j=i,rstart=row[j]; j<n; j++) {
403           if (row[j] != rstart) break;
404         }
405         if (j < n) ncols = j-i;
406         else       ncols = n-i;
407         /* Now assemble all these values with a single function call */
408         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
409         i    = j;
410       }
411     }
412     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
413   }
414 
415   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
417 
418   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
419     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
425 {
426   PetscErrorCode ierr;
427   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
428 
429   PetscFunctionBegin;
430   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
435 {
436   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
437   PetscErrorCode    ierr;
438   PetscInt          i,len,*lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
449     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
450     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
451     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
452     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
453   }
454   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
455   if (diag != 0.0) {
456     Vec d;
457 
458     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
459     ierr = VecSet(d,diag);CHKERRQ(ierr);
460     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
461     ierr = VecDestroy(&d);CHKERRQ(ierr);
462   }
463   ierr = PetscFree(lrows);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode    ierr;
476   const PetscScalar *ax;
477   PetscScalar       *ay;
478   PetscMemType      axmtype,aymtype;
479 
480   PetscFunctionBegin;
481   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
482   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
483   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr);
484   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr);
485   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
486   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
487   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
494   PetscErrorCode    ierr;
495   const PetscScalar *ax;
496   PetscScalar       *ay;
497   PetscMemType      axmtype,aymtype;
498 
499   PetscFunctionBegin;
500   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
501   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
502   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr);
503   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr);
504   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
505   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
506   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode    ierr;
514   const PetscScalar *ax;
515   PetscScalar       *ay;
516   PetscMemType      axmtype,aymtype;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
520   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
522   ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr);
523   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
524   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
525   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
526   ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
533   PetscErrorCode    ierr;
534   const PetscScalar *ax;
535   PetscScalar       *ay;
536   PetscMemType      axmtype,aymtype;
537 
538   PetscFunctionBegin;
539   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
540   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
541   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
542   ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr);
543   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
544   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
545   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
546   ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
551 {
552   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
553   PetscErrorCode    ierr;
554   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
555   PetscScalar       *x,zero = 0.0;
556   const PetscScalar *av;
557 
558   PetscFunctionBegin;
559   ierr = VecSet(v,zero);CHKERRQ(ierr);
560   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
561   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
562   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
563   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
564   radd = A->rmap->rstart*m;
565   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
566   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
567   for (i=0; i<len; i++) {
568     x[i] = av[radd + i*lda + i];
569   }
570   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
571   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatDestroy_MPIDense(Mat mat)
576 {
577   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
578   PetscErrorCode ierr;
579 
580   PetscFunctionBegin;
581 #if defined(PETSC_USE_LOG)
582   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
583 #endif
584   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
585   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
586   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
587   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
588   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
589   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
590   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
591   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
592 
593   ierr = PetscFree(mat->data);CHKERRQ(ierr);
594   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
595 
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
609 #if defined(PETSC_HAVE_ELEMENTAL)
610   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
611 #endif
612 #if defined(PETSC_HAVE_SCALAPACK)
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
614 #endif
615   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
616   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
617   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
618 #if defined (PETSC_HAVE_CUDA)
619   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
620   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
621 #endif
622   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
623   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
624   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
625   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
626   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
627   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
628   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
630   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
631   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
636 
637 #include <petscdraw.h>
638 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
639 {
640   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
641   PetscErrorCode    ierr;
642   PetscMPIInt       rank;
643   PetscViewerType   vtype;
644   PetscBool         iascii,isdraw;
645   PetscViewer       sviewer;
646   PetscViewerFormat format;
647 
648   PetscFunctionBegin;
649   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
650   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
651   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
652   if (iascii) {
653     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
654     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
655     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
656       MatInfo info;
657       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
658       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
659       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
660       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
661       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
662       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
663       PetscFunctionReturn(0);
664     } else if (format == PETSC_VIEWER_ASCII_INFO) {
665       PetscFunctionReturn(0);
666     }
667   } else if (isdraw) {
668     PetscDraw draw;
669     PetscBool isnull;
670 
671     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
672     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
673     if (isnull) PetscFunctionReturn(0);
674   }
675 
676   {
677     /* assemble the entire matrix onto first processor. */
678     Mat         A;
679     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
680     PetscInt    *cols;
681     PetscScalar *vals;
682 
683     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
684     if (!rank) {
685       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
686     } else {
687       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
688     }
689     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
690     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
691     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
692     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
693 
694     /* Copy the matrix ... This isn't the most efficient means,
695        but it's quick for now */
696     A->insertmode = INSERT_VALUES;
697 
698     row = mat->rmap->rstart;
699     m   = mdn->A->rmap->n;
700     for (i=0; i<m; i++) {
701       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
702       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
703       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
704       row++;
705     }
706 
707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
715     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
716     ierr = MatDestroy(&A);CHKERRQ(ierr);
717   }
718   PetscFunctionReturn(0);
719 }
720 
721 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
722 {
723   PetscErrorCode ierr;
724   PetscBool      iascii,isbinary,isdraw,issocket;
725 
726   PetscFunctionBegin;
727   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
728   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
729   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
730   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
731 
732   if (iascii || issocket || isdraw) {
733     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else if (isbinary) {
735     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
741 {
742   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
743   Mat            mdn  = mat->A;
744   PetscErrorCode ierr;
745   PetscLogDouble isend[5],irecv[5];
746 
747   PetscFunctionBegin;
748   info->block_size = 1.0;
749 
750   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
751 
752   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
753   isend[3] = info->memory;  isend[4] = info->mallocs;
754   if (flag == MAT_LOCAL) {
755     info->nz_used      = isend[0];
756     info->nz_allocated = isend[1];
757     info->nz_unneeded  = isend[2];
758     info->memory       = isend[3];
759     info->mallocs      = isend[4];
760   } else if (flag == MAT_GLOBAL_MAX) {
761     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
762 
763     info->nz_used      = irecv[0];
764     info->nz_allocated = irecv[1];
765     info->nz_unneeded  = irecv[2];
766     info->memory       = irecv[3];
767     info->mallocs      = irecv[4];
768   } else if (flag == MAT_GLOBAL_SUM) {
769     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
770 
771     info->nz_used      = irecv[0];
772     info->nz_allocated = irecv[1];
773     info->nz_unneeded  = irecv[2];
774     info->memory       = irecv[3];
775     info->mallocs      = irecv[4];
776   }
777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
778   info->fill_ratio_needed = 0;
779   info->factor_mallocs    = 0;
780   PetscFunctionReturn(0);
781 }
782 
783 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
784 {
785   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
786   PetscErrorCode ierr;
787 
788   PetscFunctionBegin;
789   switch (op) {
790   case MAT_NEW_NONZERO_LOCATIONS:
791   case MAT_NEW_NONZERO_LOCATION_ERR:
792   case MAT_NEW_NONZERO_ALLOCATION_ERR:
793     MatCheckPreallocated(A,1);
794     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
795     break;
796   case MAT_ROW_ORIENTED:
797     MatCheckPreallocated(A,1);
798     a->roworiented = flg;
799     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
800     break;
801   case MAT_FORCE_DIAGONAL_ENTRIES:
802   case MAT_KEEP_NONZERO_PATTERN:
803   case MAT_USE_HASH_TABLE:
804   case MAT_SORTED_FULL:
805     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
806     break;
807   case MAT_IGNORE_OFF_PROC_ENTRIES:
808     a->donotstash = flg;
809     break;
810   case MAT_SYMMETRIC:
811   case MAT_STRUCTURALLY_SYMMETRIC:
812   case MAT_HERMITIAN:
813   case MAT_SYMMETRY_ETERNAL:
814   case MAT_IGNORE_LOWER_TRIANGULAR:
815   case MAT_IGNORE_ZERO_ENTRIES:
816     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
817     break;
818   default:
819     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
825 {
826   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
827   const PetscScalar *l;
828   PetscScalar       x,*v,*vv,*r;
829   PetscErrorCode    ierr;
830   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
831 
832   PetscFunctionBegin;
833   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
834   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
835   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
836   if (ll) {
837     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
838     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
839     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
840     for (i=0; i<m; i++) {
841       x = l[i];
842       v = vv + i;
843       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
844     }
845     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
846     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
847   }
848   if (rr) {
849     const PetscScalar *ar;
850 
851     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
852     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
853     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
854     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
855     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr);
856     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr);
857     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
858     for (i=0; i<n; i++) {
859       x = r[i];
860       v = vv + i*lda;
861       for (j=0; j<m; j++) (*v++) *= x;
862     }
863     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
864     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
865   }
866   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
871 {
872   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
873   PetscErrorCode    ierr;
874   PetscInt          i,j;
875   PetscMPIInt       size;
876   PetscReal         sum = 0.0;
877   const PetscScalar *av,*v;
878 
879   PetscFunctionBegin;
880   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
881   v    = av;
882   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
883   if (size == 1) {
884     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
885   } else {
886     if (type == NORM_FROBENIUS) {
887       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
888         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
889       }
890       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
891       *nrm = PetscSqrtReal(*nrm);
892       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
893     } else if (type == NORM_1) {
894       PetscReal *tmp,*tmp2;
895       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
896       *nrm = 0.0;
897       v    = av;
898       for (j=0; j<mdn->A->cmap->n; j++) {
899         for (i=0; i<mdn->A->rmap->n; i++) {
900           tmp[j] += PetscAbsScalar(*v);  v++;
901         }
902       }
903       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
904       for (j=0; j<A->cmap->N; j++) {
905         if (tmp2[j] > *nrm) *nrm = tmp2[j];
906       }
907       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
908       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
909     } else if (type == NORM_INFINITY) { /* max row norm */
910       PetscReal ntemp;
911       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
912       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
913     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
914   }
915   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
920 {
921   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
922   Mat            B;
923   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
924   PetscErrorCode ierr;
925   PetscInt       j,i,lda;
926   PetscScalar    *v;
927 
928   PetscFunctionBegin;
929   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
930     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
931     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
932     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
933     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
934   } else B = *matout;
935 
936   m    = a->A->rmap->n; n = a->A->cmap->n;
937   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
938   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
939   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
940   for (i=0; i<m; i++) rwork[i] = rstart + i;
941   for (j=0; j<n; j++) {
942     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
943     v   += lda;
944   }
945   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
946   ierr = PetscFree(rwork);CHKERRQ(ierr);
947   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
948   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
949   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
950     *matout = B;
951   } else {
952     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
958 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
959 
960 PetscErrorCode MatSetUp_MPIDense(Mat A)
961 {
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
966   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
967   if (!A->preallocated) {
968     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
969   }
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
974 {
975   PetscErrorCode ierr;
976   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
977 
978   PetscFunctionBegin;
979   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatConjugate_MPIDense(Mat mat)
984 {
985   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = MatConjugate(a->A);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 PetscErrorCode MatRealPart_MPIDense(Mat A)
994 {
995   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
996   PetscErrorCode ierr;
997 
998   PetscFunctionBegin;
999   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1004 {
1005   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1014 {
1015   PetscErrorCode ierr;
1016   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1017 
1018   PetscFunctionBegin;
1019   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1020   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1021   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1026 
1027 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1028 {
1029   PetscErrorCode ierr;
1030   PetscInt       i,n;
1031   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1032   PetscReal      *work;
1033 
1034   PetscFunctionBegin;
1035   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1036   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1037   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1038   if (type == NORM_2) {
1039     for (i=0; i<n; i++) work[i] *= work[i];
1040   }
1041   if (type == NORM_INFINITY) {
1042     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRMPI(ierr);
1043   } else {
1044     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRMPI(ierr);
1045   }
1046   ierr = PetscFree(work);CHKERRQ(ierr);
1047   if (type == NORM_2) {
1048     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1049   }
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #if defined(PETSC_HAVE_CUDA)
1054 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1055 {
1056   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1057   PetscErrorCode ierr;
1058   PetscInt       lda;
1059 
1060   PetscFunctionBegin;
1061   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1062   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1063   if (!a->cvec) {
1064     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1065     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1066   }
1067   a->vecinuse = col + 1;
1068   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1069   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1070   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1071   *v   = a->cvec;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1076 {
1077   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1078   PetscErrorCode ierr;
1079 
1080   PetscFunctionBegin;
1081   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1082   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1083   a->vecinuse = 0;
1084   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1085   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1086   *v   = NULL;
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1091 {
1092   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1093   PetscInt       lda;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1098   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1099   if (!a->cvec) {
1100     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1101     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1102   }
1103   a->vecinuse = col + 1;
1104   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1105   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1106   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1107   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1108   *v   = a->cvec;
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1113 {
1114   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1115   PetscErrorCode ierr;
1116 
1117   PetscFunctionBegin;
1118   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1119   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1120   a->vecinuse = 0;
1121   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1122   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1123   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1124   *v   = NULL;
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1129 {
1130   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1131   PetscErrorCode ierr;
1132   PetscInt       lda;
1133 
1134   PetscFunctionBegin;
1135   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1136   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1137   if (!a->cvec) {
1138     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1139     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1140   }
1141   a->vecinuse = col + 1;
1142   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1143   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1144   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1145   *v   = a->cvec;
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1150 {
1151   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1152   PetscErrorCode ierr;
1153 
1154   PetscFunctionBegin;
1155   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1156   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1157   a->vecinuse = 0;
1158   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1159   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1160   *v   = NULL;
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1165 {
1166   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1167   PetscErrorCode ierr;
1168 
1169   PetscFunctionBegin;
1170   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1171   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1172   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1177 {
1178   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1179   PetscErrorCode ierr;
1180 
1181   PetscFunctionBegin;
1182   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1183   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1184   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1189 {
1190   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1195   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1196   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1201 {
1202   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1211 {
1212   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1213   PetscErrorCode ierr;
1214 
1215   PetscFunctionBegin;
1216   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1221 {
1222   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1231 {
1232   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1241 {
1242   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1243   PetscErrorCode ierr;
1244 
1245   PetscFunctionBegin;
1246   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1251 {
1252   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1261 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1262 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1263 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1264 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1265 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1266 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1267 
1268 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1269 {
1270   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1275   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1276   if (d->A) {
1277     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1278   }
1279   mat->boundtocpu = bind;
1280   if (!bind) {
1281     PetscBool iscuda;
1282 
1283     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1284     if (!iscuda) {
1285       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1286     }
1287     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1288     if (!iscuda) {
1289       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1290     }
1291     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1292     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1293     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1294     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1295     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1296     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1297   } else {
1298     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1299     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1300     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1301     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1302     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1303     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1304   }
1305   if (d->cmat) {
1306     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1307   }
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1312 {
1313   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1314   PetscErrorCode ierr;
1315   PetscBool      iscuda;
1316 
1317   PetscFunctionBegin;
1318   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1319   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1320   if (!iscuda) PetscFunctionReturn(0);
1321   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1322   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1323   if (!d->A) {
1324     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1325     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1326     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1327   }
1328   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1329   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1330   A->preallocated = PETSC_TRUE;
1331   PetscFunctionReturn(0);
1332 }
1333 #endif
1334 
1335 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1336 {
1337   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1338   PetscErrorCode ierr;
1339 
1340   PetscFunctionBegin;
1341   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1346 {
1347   PetscFunctionBegin;
1348   *missing = PETSC_FALSE;
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1353 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1354 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1355 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1356 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1357 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1358 
1359 /* -------------------------------------------------------------------*/
1360 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1361                                         MatGetRow_MPIDense,
1362                                         MatRestoreRow_MPIDense,
1363                                         MatMult_MPIDense,
1364                                 /*  4*/ MatMultAdd_MPIDense,
1365                                         MatMultTranspose_MPIDense,
1366                                         MatMultTransposeAdd_MPIDense,
1367                                         NULL,
1368                                         NULL,
1369                                         NULL,
1370                                 /* 10*/ NULL,
1371                                         NULL,
1372                                         NULL,
1373                                         NULL,
1374                                         MatTranspose_MPIDense,
1375                                 /* 15*/ MatGetInfo_MPIDense,
1376                                         MatEqual_MPIDense,
1377                                         MatGetDiagonal_MPIDense,
1378                                         MatDiagonalScale_MPIDense,
1379                                         MatNorm_MPIDense,
1380                                 /* 20*/ MatAssemblyBegin_MPIDense,
1381                                         MatAssemblyEnd_MPIDense,
1382                                         MatSetOption_MPIDense,
1383                                         MatZeroEntries_MPIDense,
1384                                 /* 24*/ MatZeroRows_MPIDense,
1385                                         NULL,
1386                                         NULL,
1387                                         NULL,
1388                                         NULL,
1389                                 /* 29*/ MatSetUp_MPIDense,
1390                                         NULL,
1391                                         NULL,
1392                                         MatGetDiagonalBlock_MPIDense,
1393                                         NULL,
1394                                 /* 34*/ MatDuplicate_MPIDense,
1395                                         NULL,
1396                                         NULL,
1397                                         NULL,
1398                                         NULL,
1399                                 /* 39*/ MatAXPY_MPIDense,
1400                                         MatCreateSubMatrices_MPIDense,
1401                                         NULL,
1402                                         MatGetValues_MPIDense,
1403                                         MatCopy_MPIDense,
1404                                 /* 44*/ NULL,
1405                                         MatScale_MPIDense,
1406                                         MatShift_Basic,
1407                                         NULL,
1408                                         NULL,
1409                                 /* 49*/ MatSetRandom_MPIDense,
1410                                         NULL,
1411                                         NULL,
1412                                         NULL,
1413                                         NULL,
1414                                 /* 54*/ NULL,
1415                                         NULL,
1416                                         NULL,
1417                                         NULL,
1418                                         NULL,
1419                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1420                                         MatDestroy_MPIDense,
1421                                         MatView_MPIDense,
1422                                         NULL,
1423                                         NULL,
1424                                 /* 64*/ NULL,
1425                                         NULL,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                 /* 69*/ NULL,
1430                                         NULL,
1431                                         NULL,
1432                                         NULL,
1433                                         NULL,
1434                                 /* 74*/ NULL,
1435                                         NULL,
1436                                         NULL,
1437                                         NULL,
1438                                         NULL,
1439                                 /* 79*/ NULL,
1440                                         NULL,
1441                                         NULL,
1442                                         NULL,
1443                                 /* 83*/ MatLoad_MPIDense,
1444                                         NULL,
1445                                         NULL,
1446                                         NULL,
1447                                         NULL,
1448                                         NULL,
1449                                 /* 89*/ NULL,
1450                                         NULL,
1451                                         NULL,
1452                                         NULL,
1453                                         NULL,
1454                                 /* 94*/ NULL,
1455                                         NULL,
1456                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1457                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1458                                         NULL,
1459                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1460                                         NULL,
1461                                         NULL,
1462                                         MatConjugate_MPIDense,
1463                                         NULL,
1464                                 /*104*/ NULL,
1465                                         MatRealPart_MPIDense,
1466                                         MatImaginaryPart_MPIDense,
1467                                         NULL,
1468                                         NULL,
1469                                 /*109*/ NULL,
1470                                         NULL,
1471                                         NULL,
1472                                         MatGetColumnVector_MPIDense,
1473                                         MatMissingDiagonal_MPIDense,
1474                                 /*114*/ NULL,
1475                                         NULL,
1476                                         NULL,
1477                                         NULL,
1478                                         NULL,
1479                                 /*119*/ NULL,
1480                                         NULL,
1481                                         NULL,
1482                                         NULL,
1483                                         NULL,
1484                                 /*124*/ NULL,
1485                                         MatGetColumnNorms_MPIDense,
1486                                         NULL,
1487                                         NULL,
1488                                         NULL,
1489                                 /*129*/ NULL,
1490                                         NULL,
1491                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1492                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1493                                         NULL,
1494                                 /*134*/ NULL,
1495                                         NULL,
1496                                         NULL,
1497                                         NULL,
1498                                         NULL,
1499                                 /*139*/ NULL,
1500                                         NULL,
1501                                         NULL,
1502                                         NULL,
1503                                         NULL,
1504                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1505                                 /*145*/ NULL,
1506                                         NULL,
1507                                         NULL
1508 };
1509 
1510 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1511 {
1512   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1513   PetscBool      iscuda;
1514   PetscErrorCode ierr;
1515 
1516   PetscFunctionBegin;
1517   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1518   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1519   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1520   if (!a->A) {
1521     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1522     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1523     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1524   }
1525   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1526   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1527   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1528   mat->preallocated = PETSC_TRUE;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1533 {
1534   Mat            B,C;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr);
1539   ierr = MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1540   ierr = MatDestroy(&C);CHKERRQ(ierr);
1541   if (reuse == MAT_REUSE_MATRIX) {
1542     C = *newmat;
1543   } else C = NULL;
1544   ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1545   ierr = MatDestroy(&B);CHKERRQ(ierr);
1546   if (reuse == MAT_INPLACE_MATRIX) {
1547     ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr);
1548   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1553 {
1554   Mat            B,C;
1555   PetscErrorCode ierr;
1556 
1557   PetscFunctionBegin;
1558   ierr = MatDenseGetLocalMatrix(A,&C);CHKERRQ(ierr);
1559   ierr = MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1560   if (reuse == MAT_REUSE_MATRIX) {
1561     C = *newmat;
1562   } else C = NULL;
1563   ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1564   ierr = MatDestroy(&B);CHKERRQ(ierr);
1565   if (reuse == MAT_INPLACE_MATRIX) {
1566     ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr);
1567   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #if defined(PETSC_HAVE_ELEMENTAL)
1572 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1573 {
1574   Mat            mat_elemental;
1575   PetscErrorCode ierr;
1576   PetscScalar    *v;
1577   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1578 
1579   PetscFunctionBegin;
1580   if (reuse == MAT_REUSE_MATRIX) {
1581     mat_elemental = *newmat;
1582     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1583   } else {
1584     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1585     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1586     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1587     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1588     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1589   }
1590 
1591   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1592   for (i=0; i<N; i++) cols[i] = i;
1593   for (i=0; i<m; i++) rows[i] = rstart + i;
1594 
1595   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1596   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1597   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1598   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1599   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1600   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1601   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1602 
1603   if (reuse == MAT_INPLACE_MATRIX) {
1604     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1605   } else {
1606     *newmat = mat_elemental;
1607   }
1608   PetscFunctionReturn(0);
1609 }
1610 #endif
1611 
1612 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1613 {
1614   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1615   PetscErrorCode ierr;
1616 
1617   PetscFunctionBegin;
1618   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1623 {
1624   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1625   PetscErrorCode ierr;
1626 
1627   PetscFunctionBegin;
1628   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1633 {
1634   PetscErrorCode ierr;
1635   Mat_MPIDense   *mat;
1636   PetscInt       m,nloc,N;
1637 
1638   PetscFunctionBegin;
1639   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1640   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1641   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1642     PetscInt sum;
1643 
1644     if (n == PETSC_DECIDE) {
1645       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1646     }
1647     /* Check sum(n) = N */
1648     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
1649     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1650 
1651     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1652     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
1653   }
1654 
1655   /* numeric phase */
1656   mat = (Mat_MPIDense*)(*outmat)->data;
1657   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1658   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1659   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 #if defined(PETSC_HAVE_CUDA)
1664 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1665 {
1666   Mat            B;
1667   Mat_MPIDense   *m;
1668   PetscErrorCode ierr;
1669 
1670   PetscFunctionBegin;
1671   if (reuse == MAT_INITIAL_MATRIX) {
1672     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1673   } else if (reuse == MAT_REUSE_MATRIX) {
1674     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1675   }
1676 
1677   B    = *newmat;
1678   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1679   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1680   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1681   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1696   m    = (Mat_MPIDense*)(B)->data;
1697   if (m->A) {
1698     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1699     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1700   }
1701   B->ops->bindtocpu = NULL;
1702   B->offloadmask    = PETSC_OFFLOAD_CPU;
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1707 {
1708   Mat            B;
1709   Mat_MPIDense   *m;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   if (reuse == MAT_INITIAL_MATRIX) {
1714     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1715   } else if (reuse == MAT_REUSE_MATRIX) {
1716     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1717   }
1718 
1719   B    = *newmat;
1720   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1721   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1722   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1723   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1724   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1725   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1726   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1727   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1728   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1729   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1730   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1731   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1732   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1733   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1734   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1735   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1736   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1737   m    = (Mat_MPIDense*)(B->data);
1738   if (m->A) {
1739     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1740     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1741     B->offloadmask = PETSC_OFFLOAD_BOTH;
1742   } else {
1743     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1744   }
1745   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1746 
1747   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1748   PetscFunctionReturn(0);
1749 }
1750 #endif
1751 
1752 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1753 {
1754   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1755   PetscErrorCode ierr;
1756   PetscInt       lda;
1757 
1758   PetscFunctionBegin;
1759   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1760   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1761   if (!a->cvec) {
1762     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1763   }
1764   a->vecinuse = col + 1;
1765   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1766   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1767   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1768   *v   = a->cvec;
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1773 {
1774   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1779   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1780   a->vecinuse = 0;
1781   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1782   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1783   *v   = NULL;
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1788 {
1789   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1790   PetscErrorCode ierr;
1791   PetscInt       lda;
1792 
1793   PetscFunctionBegin;
1794   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1795   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1796   if (!a->cvec) {
1797     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1798   }
1799   a->vecinuse = col + 1;
1800   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1801   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1802   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1803   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1804   *v   = a->cvec;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1809 {
1810   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1811   PetscErrorCode ierr;
1812 
1813   PetscFunctionBegin;
1814   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1815   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1816   a->vecinuse = 0;
1817   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1818   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1819   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1820   *v   = NULL;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1825 {
1826   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1827   PetscErrorCode ierr;
1828   PetscInt       lda;
1829 
1830   PetscFunctionBegin;
1831   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1832   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1833   if (!a->cvec) {
1834     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1835   }
1836   a->vecinuse = col + 1;
1837   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1838   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1839   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1840   *v   = a->cvec;
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1845 {
1846   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1847   PetscErrorCode ierr;
1848 
1849   PetscFunctionBegin;
1850   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1851   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1852   a->vecinuse = 0;
1853   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1854   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1855   *v   = NULL;
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1860 {
1861   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1862   Mat_MPIDense   *c;
1863   PetscErrorCode ierr;
1864   MPI_Comm       comm;
1865   PetscBool      setup = PETSC_FALSE;
1866 
1867   PetscFunctionBegin;
1868   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1869   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1870   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1871   if (!a->cmat) {
1872     setup = PETSC_TRUE;
1873     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1874     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1875     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1876     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1877     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1878     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1879   } else if (cend-cbegin != a->cmat->cmap->N) {
1880     setup = PETSC_TRUE;
1881     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1882     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1883     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1884     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1885   }
1886   c = (Mat_MPIDense*)a->cmat->data;
1887   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1888   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1889   if (setup) { /* do we really need this? */
1890     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1891   }
1892   a->cmat->preallocated = PETSC_TRUE;
1893   a->cmat->assembled = PETSC_TRUE;
1894   a->matinuse = cbegin + 1;
1895   *v = a->cmat;
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1900 {
1901   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1902   Mat_MPIDense   *c;
1903   PetscErrorCode ierr;
1904 
1905   PetscFunctionBegin;
1906   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1907   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1908   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1909   a->matinuse = 0;
1910   c    = (Mat_MPIDense*)a->cmat->data;
1911   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1912   *v   = NULL;
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 /*MC
1917    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1918 
1919    Options Database Keys:
1920 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1921 
1922   Level: beginner
1923 
1924 .seealso: MatCreateDense()
1925 
1926 M*/
1927 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1928 {
1929   Mat_MPIDense   *a;
1930   PetscErrorCode ierr;
1931 
1932   PetscFunctionBegin;
1933   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1934   mat->data = (void*)a;
1935   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1936 
1937   mat->insertmode = NOT_SET_VALUES;
1938 
1939   /* build cache for off array entries formed */
1940   a->donotstash = PETSC_FALSE;
1941 
1942   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1943 
1944   /* stuff used for matrix vector multiply */
1945   a->lvec        = NULL;
1946   a->Mvctx       = NULL;
1947   a->roworiented = PETSC_TRUE;
1948 
1949   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1950   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1951   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1952   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1953   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1954   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1955   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1956   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1957   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1958   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1959   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1960   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1961   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1962   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1963   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1964   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1965   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1966   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1967   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1968   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
1969   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ);CHKERRQ(ierr);
1970 #if defined(PETSC_HAVE_ELEMENTAL)
1971   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1972 #endif
1973 #if defined(PETSC_HAVE_SCALAPACK)
1974   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1975 #endif
1976 #if defined(PETSC_HAVE_CUDA)
1977   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1978 #endif
1979   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1980   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1981   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1982 #if defined(PETSC_HAVE_CUDA)
1983   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1984   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1985 #endif
1986 
1987   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1988   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1989   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 /*MC
1994    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1995 
1996    Options Database Keys:
1997 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1998 
1999   Level: beginner
2000 
2001 .seealso:
2002 
2003 M*/
2004 #if defined(PETSC_HAVE_CUDA)
2005 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2006 {
2007   PetscErrorCode ierr;
2008 
2009   PetscFunctionBegin;
2010   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
2011   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
2012   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
2013   PetscFunctionReturn(0);
2014 }
2015 #endif
2016 
2017 /*MC
2018    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2019 
2020    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
2021    and MATMPIDENSE otherwise.
2022 
2023    Options Database Keys:
2024 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
2025 
2026   Level: beginner
2027 
2028 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
2029 M*/
2030 
2031 /*MC
2032    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2033 
2034    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
2035    and MATMPIDENSECUDA otherwise.
2036 
2037    Options Database Keys:
2038 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
2039 
2040   Level: beginner
2041 
2042 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
2043 M*/
2044 
2045 /*@C
2046    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2047 
2048    Collective
2049 
2050    Input Parameters:
2051 .  B - the matrix
2052 -  data - optional location of matrix data.  Set data=NULL for PETSc
2053    to control all matrix memory allocation.
2054 
2055    Notes:
2056    The dense format is fully compatible with standard Fortran 77
2057    storage by columns.
2058 
2059    The data input variable is intended primarily for Fortran programmers
2060    who wish to allocate their own matrix memory space.  Most users should
2061    set data=NULL.
2062 
2063    Level: intermediate
2064 
2065 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2066 @*/
2067 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2068 {
2069   PetscErrorCode ierr;
2070 
2071   PetscFunctionBegin;
2072   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2073   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 /*@
2078    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2079    array provided by the user. This is useful to avoid copying an array
2080    into a matrix
2081 
2082    Not Collective
2083 
2084    Input Parameters:
2085 +  mat - the matrix
2086 -  array - the array in column major order
2087 
2088    Notes:
2089    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2090    freed when the matrix is destroyed.
2091 
2092    Level: developer
2093 
2094 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2095 
2096 @*/
2097 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2098 {
2099   PetscErrorCode ierr;
2100 
2101   PetscFunctionBegin;
2102   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2103   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2104   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2105 #if defined(PETSC_HAVE_CUDA)
2106   mat->offloadmask = PETSC_OFFLOAD_CPU;
2107 #endif
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 /*@
2112    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2113 
2114    Not Collective
2115 
2116    Input Parameters:
2117 .  mat - the matrix
2118 
2119    Notes:
2120    You can only call this after a call to MatDensePlaceArray()
2121 
2122    Level: developer
2123 
2124 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2125 
2126 @*/
2127 PetscErrorCode  MatDenseResetArray(Mat mat)
2128 {
2129   PetscErrorCode ierr;
2130 
2131   PetscFunctionBegin;
2132   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2133   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2134   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 /*@
2139    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2140    array provided by the user. This is useful to avoid copying an array
2141    into a matrix
2142 
2143    Not Collective
2144 
2145    Input Parameters:
2146 +  mat - the matrix
2147 -  array - the array in column major order
2148 
2149    Notes:
2150    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2151    freed by the user. It will be freed when the matrix is destroyed.
2152 
2153    Level: developer
2154 
2155 .seealso: MatDenseGetArray(), VecReplaceArray()
2156 @*/
2157 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2158 {
2159   PetscErrorCode ierr;
2160 
2161   PetscFunctionBegin;
2162   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2163   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2164   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2165 #if defined(PETSC_HAVE_CUDA)
2166   mat->offloadmask = PETSC_OFFLOAD_CPU;
2167 #endif
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 #if defined(PETSC_HAVE_CUDA)
2172 /*@C
2173    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2174    array provided by the user. This is useful to avoid copying an array
2175    into a matrix
2176 
2177    Not Collective
2178 
2179    Input Parameters:
2180 +  mat - the matrix
2181 -  array - the array in column major order
2182 
2183    Notes:
2184    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2185    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2186 
2187    Level: developer
2188 
2189 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2190 @*/
2191 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2192 {
2193   PetscErrorCode ierr;
2194 
2195   PetscFunctionBegin;
2196   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2197   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2198   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2199   mat->offloadmask = PETSC_OFFLOAD_GPU;
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 /*@C
2204    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2205 
2206    Not Collective
2207 
2208    Input Parameters:
2209 .  mat - the matrix
2210 
2211    Notes:
2212    You can only call this after a call to MatDenseCUDAPlaceArray()
2213 
2214    Level: developer
2215 
2216 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2217 
2218 @*/
2219 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2220 {
2221   PetscErrorCode ierr;
2222 
2223   PetscFunctionBegin;
2224   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2225   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2226   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 /*@C
2231    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2232    array provided by the user. This is useful to avoid copying an array
2233    into a matrix
2234 
2235    Not Collective
2236 
2237    Input Parameters:
2238 +  mat - the matrix
2239 -  array - the array in column major order
2240 
2241    Notes:
2242    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2243    The memory passed in CANNOT be freed by the user. It will be freed
2244    when the matrix is destroyed. The array should respect the matrix leading dimension.
2245 
2246    Level: developer
2247 
2248 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2249 @*/
2250 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2251 {
2252   PetscErrorCode ierr;
2253 
2254   PetscFunctionBegin;
2255   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2256   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2257   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2258   mat->offloadmask = PETSC_OFFLOAD_GPU;
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 /*@C
2263    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2264 
2265    Not Collective
2266 
2267    Input Parameters:
2268 .  A - the matrix
2269 
2270    Output Parameters
2271 .  array - the GPU array in column major order
2272 
2273    Notes:
2274    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2275 
2276    Level: developer
2277 
2278 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2279 @*/
2280 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2281 {
2282   PetscErrorCode ierr;
2283 
2284   PetscFunctionBegin;
2285   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2286   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2287   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 /*@C
2292    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2293 
2294    Not Collective
2295 
2296    Input Parameters:
2297 +  A - the matrix
2298 -  array - the GPU array in column major order
2299 
2300    Notes:
2301 
2302    Level: developer
2303 
2304 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2305 @*/
2306 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2307 {
2308   PetscErrorCode ierr;
2309 
2310   PetscFunctionBegin;
2311   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2312   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2313   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2314   A->offloadmask = PETSC_OFFLOAD_GPU;
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 /*@C
2319    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2320 
2321    Not Collective
2322 
2323    Input Parameters:
2324 .  A - the matrix
2325 
2326    Output Parameters
2327 .  array - the GPU array in column major order
2328 
2329    Notes:
2330    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2331 
2332    Level: developer
2333 
2334 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2335 @*/
2336 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2337 {
2338   PetscErrorCode ierr;
2339 
2340   PetscFunctionBegin;
2341   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2342   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 /*@C
2347    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2348 
2349    Not Collective
2350 
2351    Input Parameters:
2352 +  A - the matrix
2353 -  array - the GPU array in column major order
2354 
2355    Notes:
2356    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2357 
2358    Level: developer
2359 
2360 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2361 @*/
2362 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2363 {
2364   PetscErrorCode ierr;
2365 
2366   PetscFunctionBegin;
2367   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 /*@C
2372    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2373 
2374    Not Collective
2375 
2376    Input Parameters:
2377 .  A - the matrix
2378 
2379    Output Parameters
2380 .  array - the GPU array in column major order
2381 
2382    Notes:
2383    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2384 
2385    Level: developer
2386 
2387 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2388 @*/
2389 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2390 {
2391   PetscErrorCode ierr;
2392 
2393   PetscFunctionBegin;
2394   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2395   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2396   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 /*@C
2401    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2402 
2403    Not Collective
2404 
2405    Input Parameters:
2406 +  A - the matrix
2407 -  array - the GPU array in column major order
2408 
2409    Notes:
2410 
2411    Level: developer
2412 
2413 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2414 @*/
2415 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2416 {
2417   PetscErrorCode ierr;
2418 
2419   PetscFunctionBegin;
2420   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2421   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2422   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2423   A->offloadmask = PETSC_OFFLOAD_GPU;
2424   PetscFunctionReturn(0);
2425 }
2426 #endif
2427 
2428 /*@C
2429    MatCreateDense - Creates a matrix in dense format.
2430 
2431    Collective
2432 
2433    Input Parameters:
2434 +  comm - MPI communicator
2435 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2436 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2437 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2438 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2439 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2440    to control all matrix memory allocation.
2441 
2442    Output Parameter:
2443 .  A - the matrix
2444 
2445    Notes:
2446    The dense format is fully compatible with standard Fortran 77
2447    storage by columns.
2448    Note that, although local portions of the matrix are stored in column-major
2449    order, the matrix is partitioned across MPI ranks by row.
2450 
2451    The data input variable is intended primarily for Fortran programmers
2452    who wish to allocate their own matrix memory space.  Most users should
2453    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2454 
2455    The user MUST specify either the local or global matrix dimensions
2456    (possibly both).
2457 
2458    Level: intermediate
2459 
2460 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2461 @*/
2462 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2463 {
2464   PetscErrorCode ierr;
2465   PetscMPIInt    size;
2466 
2467   PetscFunctionBegin;
2468   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2469   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2470   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2471   if (size > 1) {
2472     PetscBool havedata = (PetscBool)!!data;
2473 
2474     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2475     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2476     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr);
2477     if (havedata) {  /* user provided data array, so no need to assemble */
2478       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2479       (*A)->assembled = PETSC_TRUE;
2480     }
2481   } else {
2482     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2483     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2484   }
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 #if defined(PETSC_HAVE_CUDA)
2489 /*@C
2490    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2491 
2492    Collective
2493 
2494    Input Parameters:
2495 +  comm - MPI communicator
2496 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2497 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2498 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2499 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2500 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2501    to control matrix memory allocation.
2502 
2503    Output Parameter:
2504 .  A - the matrix
2505 
2506    Notes:
2507 
2508    Level: intermediate
2509 
2510 .seealso: MatCreate(), MatCreateDense()
2511 @*/
2512 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2513 {
2514   PetscErrorCode ierr;
2515   PetscMPIInt    size;
2516 
2517   PetscFunctionBegin;
2518   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2519   PetscValidLogicalCollectiveBool(*A,!!data,6);
2520   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2521   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2522   if (size > 1) {
2523     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2524     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2525     if (data) {  /* user provided data array, so no need to assemble */
2526       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2527       (*A)->assembled = PETSC_TRUE;
2528     }
2529   } else {
2530     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2531     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2532   }
2533   PetscFunctionReturn(0);
2534 }
2535 #endif
2536 
2537 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2538 {
2539   Mat            mat;
2540   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2541   PetscErrorCode ierr;
2542 
2543   PetscFunctionBegin;
2544   *newmat = NULL;
2545   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2546   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2547   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2548   a       = (Mat_MPIDense*)mat->data;
2549 
2550   mat->factortype   = A->factortype;
2551   mat->assembled    = PETSC_TRUE;
2552   mat->preallocated = PETSC_TRUE;
2553 
2554   mat->insertmode = NOT_SET_VALUES;
2555   a->donotstash   = oldmat->donotstash;
2556 
2557   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2558   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2559 
2560   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2561   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2562   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2563 
2564   *newmat = mat;
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2569 {
2570   PetscErrorCode ierr;
2571   PetscBool      isbinary;
2572 #if defined(PETSC_HAVE_HDF5)
2573   PetscBool      ishdf5;
2574 #endif
2575 
2576   PetscFunctionBegin;
2577   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2578   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2579   /* force binary viewer to load .info file if it has not yet done so */
2580   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2581   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2582 #if defined(PETSC_HAVE_HDF5)
2583   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2584 #endif
2585   if (isbinary) {
2586     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2587 #if defined(PETSC_HAVE_HDF5)
2588   } else if (ishdf5) {
2589     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2590 #endif
2591   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2596 {
2597   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2598   Mat            a,b;
2599   PetscBool      flg;
2600   PetscErrorCode ierr;
2601 
2602   PetscFunctionBegin;
2603   a    = matA->A;
2604   b    = matB->A;
2605   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2606   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2611 {
2612   PetscErrorCode        ierr;
2613   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2614 
2615   PetscFunctionBegin;
2616   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2617   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2618   ierr = PetscFree(atb);CHKERRQ(ierr);
2619   PetscFunctionReturn(0);
2620 }
2621 
2622 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2623 {
2624   PetscErrorCode        ierr;
2625   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2626 
2627   PetscFunctionBegin;
2628   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2629   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2630   ierr = PetscFree(abt);CHKERRQ(ierr);
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2635 {
2636   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2637   Mat_TransMatMultDense *atb;
2638   PetscErrorCode        ierr;
2639   MPI_Comm              comm;
2640   PetscMPIInt           size,*recvcounts;
2641   PetscScalar           *carray,*sendbuf;
2642   const PetscScalar     *atbarray;
2643   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2644   const PetscInt        *ranges;
2645 
2646   PetscFunctionBegin;
2647   MatCheckProduct(C,3);
2648   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2649   atb = (Mat_TransMatMultDense *)C->product->data;
2650   recvcounts = atb->recvcounts;
2651   sendbuf = atb->sendbuf;
2652 
2653   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2654   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2655 
2656   /* compute atbarray = aseq^T * bseq */
2657   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2658 
2659   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2660 
2661   /* arrange atbarray into sendbuf */
2662   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2663   for (proc=0, k=0; proc<size; proc++) {
2664     for (j=0; j<cN; j++) {
2665       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2666     }
2667   }
2668   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2669 
2670   /* sum all atbarray to local values of C */
2671   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2672   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr);
2673   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2674   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2675   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2676   PetscFunctionReturn(0);
2677 }
2678 
2679 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2680 {
2681   PetscErrorCode        ierr;
2682   MPI_Comm              comm;
2683   PetscMPIInt           size;
2684   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2685   Mat_TransMatMultDense *atb;
2686   PetscBool             cisdense;
2687   PetscInt              i;
2688   const PetscInt        *ranges;
2689 
2690   PetscFunctionBegin;
2691   MatCheckProduct(C,3);
2692   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2693   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2694   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2695     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2696   }
2697 
2698   /* create matrix product C */
2699   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2700   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2701   if (!cisdense) {
2702     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2703   }
2704   ierr = MatSetUp(C);CHKERRQ(ierr);
2705 
2706   /* create data structure for reuse C */
2707   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2708   ierr = PetscNew(&atb);CHKERRQ(ierr);
2709   cM   = C->rmap->N;
2710   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2711   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2712   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2713 
2714   C->product->data    = atb;
2715   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2716   PetscFunctionReturn(0);
2717 }
2718 
2719 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2720 {
2721   PetscErrorCode        ierr;
2722   MPI_Comm              comm;
2723   PetscMPIInt           i, size;
2724   PetscInt              maxRows, bufsiz;
2725   PetscMPIInt           tag;
2726   PetscInt              alg;
2727   Mat_MatTransMultDense *abt;
2728   Mat_Product           *product = C->product;
2729   PetscBool             flg;
2730 
2731   PetscFunctionBegin;
2732   MatCheckProduct(C,4);
2733   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2734   /* check local size of A and B */
2735   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2736 
2737   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2738   alg  = flg ? 0 : 1;
2739 
2740   /* setup matrix product C */
2741   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2742   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2743   ierr = MatSetUp(C);CHKERRQ(ierr);
2744   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2745 
2746   /* create data structure for reuse C */
2747   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2748   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2749   ierr = PetscNew(&abt);CHKERRQ(ierr);
2750   abt->tag = tag;
2751   abt->alg = alg;
2752   switch (alg) {
2753   case 1: /* alg: "cyclic" */
2754     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2755     bufsiz = A->cmap->N * maxRows;
2756     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2757     break;
2758   default: /* alg: "allgatherv" */
2759     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2760     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2761     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2762     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2763     break;
2764   }
2765 
2766   C->product->data    = abt;
2767   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2768   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2769   PetscFunctionReturn(0);
2770 }
2771 
2772 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2773 {
2774   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2775   Mat_MatTransMultDense *abt;
2776   PetscErrorCode        ierr;
2777   MPI_Comm              comm;
2778   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2779   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2780   PetscInt              i,cK=A->cmap->N,k,j,bn;
2781   PetscScalar           _DOne=1.0,_DZero=0.0;
2782   const PetscScalar     *av,*bv;
2783   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2784   MPI_Request           reqs[2];
2785   const PetscInt        *ranges;
2786 
2787   PetscFunctionBegin;
2788   MatCheckProduct(C,3);
2789   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2790   abt  = (Mat_MatTransMultDense*)C->product->data;
2791   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2792   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2793   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2794   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2795   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2796   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2797   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2798   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2799   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2800   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2801   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2802   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2803   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2804   bn   = B->rmap->n;
2805   if (blda == bn) {
2806     sendbuf = (PetscScalar*)bv;
2807   } else {
2808     sendbuf = abt->buf[0];
2809     for (k = 0, i = 0; i < cK; i++) {
2810       for (j = 0; j < bn; j++, k++) {
2811         sendbuf[k] = bv[i * blda + j];
2812       }
2813     }
2814   }
2815   if (size > 1) {
2816     sendto = (rank + size - 1) % size;
2817     recvfrom = (rank + size + 1) % size;
2818   } else {
2819     sendto = recvfrom = 0;
2820   }
2821   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2822   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2823   recvisfrom = rank;
2824   for (i = 0; i < size; i++) {
2825     /* we have finished receiving in sending, bufs can be read/modified */
2826     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2827     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2828 
2829     if (nextrecvisfrom != rank) {
2830       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2831       sendsiz = cK * bn;
2832       recvsiz = cK * nextbn;
2833       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2834       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr);
2835       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr);
2836     }
2837 
2838     /* local aseq * sendbuf^T */
2839     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2840     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2841 
2842     if (nextrecvisfrom != rank) {
2843       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2844       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
2845     }
2846     bn = nextbn;
2847     recvisfrom = nextrecvisfrom;
2848     sendbuf = recvbuf;
2849   }
2850   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2851   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2852   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2853   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2854   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2859 {
2860   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2861   Mat_MatTransMultDense *abt;
2862   PetscErrorCode        ierr;
2863   MPI_Comm              comm;
2864   PetscMPIInt           size;
2865   PetscScalar           *cv, *sendbuf, *recvbuf;
2866   const PetscScalar     *av,*bv;
2867   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2868   PetscScalar           _DOne=1.0,_DZero=0.0;
2869   PetscBLASInt          cm, cn, ck, alda, clda;
2870 
2871   PetscFunctionBegin;
2872   MatCheckProduct(C,3);
2873   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2874   abt  = (Mat_MatTransMultDense*)C->product->data;
2875   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2876   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2877   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2878   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2879   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2880   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2881   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2882   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2883   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2884   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2885   /* copy transpose of B into buf[0] */
2886   bn      = B->rmap->n;
2887   sendbuf = abt->buf[0];
2888   recvbuf = abt->buf[1];
2889   for (k = 0, j = 0; j < bn; j++) {
2890     for (i = 0; i < cK; i++, k++) {
2891       sendbuf[k] = bv[i * blda + j];
2892     }
2893   }
2894   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2895   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr);
2896   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2897   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2898   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2899   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2900   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2901   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2902   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2903   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2904   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2905   PetscFunctionReturn(0);
2906 }
2907 
2908 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2909 {
2910   Mat_MatTransMultDense *abt;
2911   PetscErrorCode        ierr;
2912 
2913   PetscFunctionBegin;
2914   MatCheckProduct(C,3);
2915   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2916   abt = (Mat_MatTransMultDense*)C->product->data;
2917   switch (abt->alg) {
2918   case 1:
2919     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2920     break;
2921   default:
2922     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2923     break;
2924   }
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2929 {
2930   PetscErrorCode   ierr;
2931   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2932 
2933   PetscFunctionBegin;
2934   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2935   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2936   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2937   ierr = PetscFree(ab);CHKERRQ(ierr);
2938   PetscFunctionReturn(0);
2939 }
2940 
2941 #if defined(PETSC_HAVE_ELEMENTAL)
2942 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2943 {
2944   PetscErrorCode   ierr;
2945   Mat_MatMultDense *ab;
2946 
2947   PetscFunctionBegin;
2948   MatCheckProduct(C,3);
2949   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2950   ab   = (Mat_MatMultDense*)C->product->data;
2951   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2952   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2953   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2954   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2955   PetscFunctionReturn(0);
2956 }
2957 
2958 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2959 {
2960   PetscErrorCode   ierr;
2961   Mat              Ae,Be,Ce;
2962   Mat_MatMultDense *ab;
2963 
2964   PetscFunctionBegin;
2965   MatCheckProduct(C,4);
2966   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2967   /* check local size of A and B */
2968   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2969     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2970   }
2971 
2972   /* create elemental matrices Ae and Be */
2973   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2974   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2975   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2976   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2977   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2978 
2979   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2980   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2981   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2982   ierr = MatSetUp(Be);CHKERRQ(ierr);
2983   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2984 
2985   /* compute symbolic Ce = Ae*Be */
2986   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2987   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2988 
2989   /* setup C */
2990   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2991   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2992   ierr = MatSetUp(C);CHKERRQ(ierr);
2993 
2994   /* create data structure for reuse Cdense */
2995   ierr = PetscNew(&ab);CHKERRQ(ierr);
2996   ab->Ae = Ae;
2997   ab->Be = Be;
2998   ab->Ce = Ce;
2999 
3000   C->product->data    = ab;
3001   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
3002   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3003   PetscFunctionReturn(0);
3004 }
3005 #endif
3006 /* ----------------------------------------------- */
3007 #if defined(PETSC_HAVE_ELEMENTAL)
3008 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3009 {
3010   PetscFunctionBegin;
3011   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3012   C->ops->productsymbolic = MatProductSymbolic_AB;
3013   PetscFunctionReturn(0);
3014 }
3015 #endif
3016 
3017 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3018 {
3019   Mat_Product *product = C->product;
3020   Mat         A = product->A,B=product->B;
3021 
3022   PetscFunctionBegin;
3023   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3024     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
3025   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3026   C->ops->productsymbolic = MatProductSymbolic_AtB;
3027   PetscFunctionReturn(0);
3028 }
3029 
3030 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3031 {
3032   PetscErrorCode ierr;
3033   Mat_Product    *product = C->product;
3034   const char     *algTypes[2] = {"allgatherv","cyclic"};
3035   PetscInt       alg,nalg = 2;
3036   PetscBool      flg = PETSC_FALSE;
3037 
3038   PetscFunctionBegin;
3039   /* Set default algorithm */
3040   alg = 0; /* default is allgatherv */
3041   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
3042   if (flg) {
3043     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3044   }
3045 
3046   /* Get runtime option */
3047   if (product->api_user) {
3048     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
3049     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3050     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3051   } else {
3052     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
3053     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3054     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3055   }
3056   if (flg) {
3057     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3058   }
3059 
3060   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3061   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3062   PetscFunctionReturn(0);
3063 }
3064 
3065 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3066 {
3067   PetscErrorCode ierr;
3068   Mat_Product    *product = C->product;
3069 
3070   PetscFunctionBegin;
3071   switch (product->type) {
3072 #if defined(PETSC_HAVE_ELEMENTAL)
3073   case MATPRODUCT_AB:
3074     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3075     break;
3076 #endif
3077   case MATPRODUCT_AtB:
3078     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3079     break;
3080   case MATPRODUCT_ABt:
3081     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3082     break;
3083   default:
3084     break;
3085   }
3086   PetscFunctionReturn(0);
3087 }
3088