xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision daccdfde92c11ef6feb5e32fb20a0d0151d40d7f)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
40 {
41   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
44 
45   PetscFunctionBegin;
46   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
47   lrow = row - rstart;
48   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscErrorCode ierr;
56   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
57 
58   PetscFunctionBegin;
59   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
60   lrow = row - rstart;
61   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
66 {
67   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
68   PetscErrorCode ierr;
69   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
70   PetscScalar    *array;
71   MPI_Comm       comm;
72   PetscBool      flg;
73   Mat            B;
74 
75   PetscFunctionBegin;
76   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
77   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
78   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
79   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
80 
81     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
82     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
100 {
101   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
102   PetscErrorCode ierr;
103   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
104   PetscBool      roworiented = A->roworiented;
105 
106   PetscFunctionBegin;
107   for (i=0; i<m; i++) {
108     if (idxm[i] < 0) continue;
109     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
110     if (idxm[i] >= rstart && idxm[i] < rend) {
111       row = idxm[i] - rstart;
112       if (roworiented) {
113         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
114       } else {
115         for (j=0; j<n; j++) {
116           if (idxn[j] < 0) continue;
117           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
118           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
119         }
120       }
121     } else if (!A->donotstash) {
122       mat->assembled = PETSC_FALSE;
123       if (roworiented) {
124         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
125       } else {
126         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
127       }
128     }
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
134 {
135   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
136   PetscErrorCode ierr;
137   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
142     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       for (j=0; j<n; j++) {
146         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
147         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
148         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
149       }
150     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
156 {
157   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
166 {
167   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
168   PetscErrorCode ierr;
169 
170   PetscFunctionBegin;
171   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
172   PetscFunctionReturn(0);
173 }
174 
175 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
176 {
177   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
178   PetscErrorCode ierr;
179 
180   PetscFunctionBegin;
181   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
182   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
187 {
188   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
189   PetscErrorCode ierr;
190 
191   PetscFunctionBegin;
192   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
193   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
204   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211   PetscErrorCode ierr;
212 
213   PetscFunctionBegin;
214   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
215   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
216   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
217   PetscFunctionReturn(0);
218 }
219 
220 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
221 {
222   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
223   PetscErrorCode ierr;
224 
225   PetscFunctionBegin;
226   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
227   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
228   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
233 {
234   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
235   PetscErrorCode ierr;
236 
237   PetscFunctionBegin;
238   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
239   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
240   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
241   PetscFunctionReturn(0);
242 }
243 
244 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
245 {
246   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
247   PetscErrorCode    ierr;
248   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
249   const PetscInt    *irow,*icol;
250   const PetscScalar *v;
251   PetscScalar       *bv;
252   Mat               newmat;
253   IS                iscol_local;
254   MPI_Comm          comm_is,comm_mat;
255 
256   PetscFunctionBegin;
257   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
258   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
259   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
260 
261   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
262   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
263   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
264   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
265   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
266   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
267 
268   /* No parallel redistribution currently supported! Should really check each index set
269      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
270      original matrix! */
271 
272   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
273   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
274 
275   /* Check submatrix call */
276   if (scall == MAT_REUSE_MATRIX) {
277     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
278     /* Really need to test rows and column sizes! */
279     newmat = *B;
280   } else {
281     /* Create and fill new matrix */
282     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
283     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
284     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
285     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
286   }
287 
288   /* Now extract the data pointers and do the copy, column at a time */
289   newmatd = (Mat_MPIDense*)newmat->data;
290   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
291   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
292   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
293   for (i=0; i<Ncols; i++) {
294     const PetscScalar *av = v + lda*icol[i];
295     for (j=0; j<nrows; j++) {
296       *bv++ = av[irow[j] - rstart];
297     }
298   }
299   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
300   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
301 
302   /* Assemble the matrices so that the correct flags are set */
303   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
304   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
305 
306   /* Free work space */
307   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
308   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
309   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
310   *B   = newmat;
311   PetscFunctionReturn(0);
312 }
313 
314 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
315 {
316   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
317   PetscErrorCode ierr;
318 
319   PetscFunctionBegin;
320   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
325 {
326   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
327   PetscErrorCode ierr;
328 
329   PetscFunctionBegin;
330   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
331   PetscFunctionReturn(0);
332 }
333 
334 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
335 {
336   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
337   PetscErrorCode ierr;
338 
339   PetscFunctionBegin;
340   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
341   PetscFunctionReturn(0);
342 }
343 
344 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
345 {
346   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
347   PetscErrorCode ierr;
348   PetscInt       nstash,reallocs;
349 
350   PetscFunctionBegin;
351   if (mdn->donotstash || mat->nooffprocentries) return(0);
352 
353   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
354   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
355   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
356   PetscFunctionReturn(0);
357 }
358 
359 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
360 {
361   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       i,*row,*col,flg,j,rstart,ncols;
364   PetscMPIInt    n;
365   PetscScalar    *val;
366 
367   PetscFunctionBegin;
368   if (!mdn->donotstash && !mat->nooffprocentries) {
369     /*  wait on receives */
370     while (1) {
371       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
372       if (!flg) break;
373 
374       for (i=0; i<n;) {
375         /* Now identify the consecutive vals belonging to the same row */
376         for (j=i,rstart=row[j]; j<n; j++) {
377           if (row[j] != rstart) break;
378         }
379         if (j < n) ncols = j-i;
380         else       ncols = n-i;
381         /* Now assemble all these values with a single function call */
382         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
383         i    = j;
384       }
385     }
386     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
387   }
388 
389   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
390   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
391 
392   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
393     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
394   }
395   PetscFunctionReturn(0);
396 }
397 
398 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
399 {
400   PetscErrorCode ierr;
401   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
402 
403   PetscFunctionBegin;
404   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
409 {
410   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
411   PetscErrorCode    ierr;
412   PetscInt          i,len,*lrows;
413 
414   PetscFunctionBegin;
415   /* get locally owned rows */
416   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
417   /* fix right hand side if needed */
418   if (x && b) {
419     const PetscScalar *xx;
420     PetscScalar       *bb;
421 
422     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
423     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
424     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
425     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
426     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
427   }
428   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
429   if (diag != 0.0) {
430     Vec d;
431 
432     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
433     ierr = VecSet(d,diag);CHKERRQ(ierr);
434     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
435     ierr = VecDestroy(&d);CHKERRQ(ierr);
436   }
437   ierr = PetscFree(lrows);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
442 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
443 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
444 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
445 
446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
447 {
448   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
449   PetscErrorCode    ierr;
450   const PetscScalar *ax;
451   PetscScalar       *ay;
452 
453   PetscFunctionBegin;
454   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
455   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
456   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
457   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
458   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
459   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
460   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
465 {
466   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
467   PetscErrorCode    ierr;
468   const PetscScalar *ax;
469   PetscScalar       *ay;
470 
471   PetscFunctionBegin;
472   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
473   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
474   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
475   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
476   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
477   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
478   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
483 {
484   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
485   PetscErrorCode    ierr;
486   const PetscScalar *ax;
487   PetscScalar       *ay;
488 
489   PetscFunctionBegin;
490   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
491   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
492   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
493   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
494   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
495   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
496   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
497   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
502 {
503   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
504   PetscErrorCode    ierr;
505   const PetscScalar *ax;
506   PetscScalar       *ay;
507 
508   PetscFunctionBegin;
509   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
510   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
511   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
512   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
513   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
514   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
515   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
516   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
517   PetscFunctionReturn(0);
518 }
519 
520 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
521 {
522   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
523   PetscErrorCode    ierr;
524   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
525   PetscScalar       *x,zero = 0.0;
526   const PetscScalar *av;
527 
528   PetscFunctionBegin;
529   ierr = VecSet(v,zero);CHKERRQ(ierr);
530   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
531   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
532   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
533   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
534   radd = A->rmap->rstart*m;
535   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
536   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
537   for (i=0; i<len; i++) {
538     x[i] = av[radd + i*lda + i];
539   }
540   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
541   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
542   PetscFunctionReturn(0);
543 }
544 
545 PetscErrorCode MatDestroy_MPIDense(Mat mat)
546 {
547   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
548   PetscErrorCode ierr;
549 
550   PetscFunctionBegin;
551 #if defined(PETSC_USE_LOG)
552   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
553 #endif
554   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
555   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
556   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
557   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
558   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
559   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
560   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
561   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
562 
563   ierr = PetscFree(mat->data);CHKERRQ(ierr);
564   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
565 
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
577 #if defined(PETSC_HAVE_ELEMENTAL)
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
579 #endif
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
583 #if defined (PETSC_HAVE_CUDA)
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
586 #endif
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
597   PetscFunctionReturn(0);
598 }
599 
600 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
601 
602 #include <petscdraw.h>
603 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
604 {
605   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
606   PetscErrorCode    ierr;
607   PetscMPIInt       rank;
608   PetscViewerType   vtype;
609   PetscBool         iascii,isdraw;
610   PetscViewer       sviewer;
611   PetscViewerFormat format;
612 
613   PetscFunctionBegin;
614   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
615   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
617   if (iascii) {
618     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
619     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
620     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
621       MatInfo info;
622       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
624       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
625       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
626       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
627       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
628       PetscFunctionReturn(0);
629     } else if (format == PETSC_VIEWER_ASCII_INFO) {
630       PetscFunctionReturn(0);
631     }
632   } else if (isdraw) {
633     PetscDraw draw;
634     PetscBool isnull;
635 
636     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
637     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
638     if (isnull) PetscFunctionReturn(0);
639   }
640 
641   {
642     /* assemble the entire matrix onto first processor. */
643     Mat         A;
644     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
645     PetscInt    *cols;
646     PetscScalar *vals;
647 
648     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
649     if (!rank) {
650       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
651     } else {
652       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
653     }
654     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
655     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
656     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
657     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
658 
659     /* Copy the matrix ... This isn't the most efficient means,
660        but it's quick for now */
661     A->insertmode = INSERT_VALUES;
662 
663     row = mat->rmap->rstart;
664     m   = mdn->A->rmap->n;
665     for (i=0; i<m; i++) {
666       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
667       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
668       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
669       row++;
670     }
671 
672     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
673     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
674     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
675     if (!rank) {
676       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
677       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
678     }
679     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
680     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
681     ierr = MatDestroy(&A);CHKERRQ(ierr);
682   }
683   PetscFunctionReturn(0);
684 }
685 
686 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
687 {
688   PetscErrorCode ierr;
689   PetscBool      iascii,isbinary,isdraw,issocket;
690 
691   PetscFunctionBegin;
692   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
693   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
694   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
695   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
696 
697   if (iascii || issocket || isdraw) {
698     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
699   } else if (isbinary) {
700     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
701   }
702   PetscFunctionReturn(0);
703 }
704 
705 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
706 {
707   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
708   Mat            mdn  = mat->A;
709   PetscErrorCode ierr;
710   PetscLogDouble isend[5],irecv[5];
711 
712   PetscFunctionBegin;
713   info->block_size = 1.0;
714 
715   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
716 
717   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
718   isend[3] = info->memory;  isend[4] = info->mallocs;
719   if (flag == MAT_LOCAL) {
720     info->nz_used      = isend[0];
721     info->nz_allocated = isend[1];
722     info->nz_unneeded  = isend[2];
723     info->memory       = isend[3];
724     info->mallocs      = isend[4];
725   } else if (flag == MAT_GLOBAL_MAX) {
726     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
727 
728     info->nz_used      = irecv[0];
729     info->nz_allocated = irecv[1];
730     info->nz_unneeded  = irecv[2];
731     info->memory       = irecv[3];
732     info->mallocs      = irecv[4];
733   } else if (flag == MAT_GLOBAL_SUM) {
734     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
735 
736     info->nz_used      = irecv[0];
737     info->nz_allocated = irecv[1];
738     info->nz_unneeded  = irecv[2];
739     info->memory       = irecv[3];
740     info->mallocs      = irecv[4];
741   }
742   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
743   info->fill_ratio_needed = 0;
744   info->factor_mallocs    = 0;
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
749 {
750   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754   switch (op) {
755   case MAT_NEW_NONZERO_LOCATIONS:
756   case MAT_NEW_NONZERO_LOCATION_ERR:
757   case MAT_NEW_NONZERO_ALLOCATION_ERR:
758     MatCheckPreallocated(A,1);
759     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
760     break;
761   case MAT_ROW_ORIENTED:
762     MatCheckPreallocated(A,1);
763     a->roworiented = flg;
764     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
765     break;
766   case MAT_NEW_DIAGONALS:
767   case MAT_KEEP_NONZERO_PATTERN:
768   case MAT_USE_HASH_TABLE:
769   case MAT_SORTED_FULL:
770     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
771     break;
772   case MAT_IGNORE_OFF_PROC_ENTRIES:
773     a->donotstash = flg;
774     break;
775   case MAT_SYMMETRIC:
776   case MAT_STRUCTURALLY_SYMMETRIC:
777   case MAT_HERMITIAN:
778   case MAT_SYMMETRY_ETERNAL:
779   case MAT_IGNORE_LOWER_TRIANGULAR:
780   case MAT_IGNORE_ZERO_ENTRIES:
781     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
782     break;
783   default:
784     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
790 {
791   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
792   const PetscScalar *l;
793   PetscScalar       x,*v,*vv,*r;
794   PetscErrorCode    ierr;
795   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
796 
797   PetscFunctionBegin;
798   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
799   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
800   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
801   if (ll) {
802     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
803     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
804     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
805     for (i=0; i<m; i++) {
806       x = l[i];
807       v = vv + i;
808       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
809     }
810     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
811     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
812   }
813   if (rr) {
814     const PetscScalar *ar;
815 
816     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
817     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
818     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
819     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
820     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
821     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
822     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
823     for (i=0; i<n; i++) {
824       x = r[i];
825       v = vv + i*lda;
826       for (j=0; j<m; j++) (*v++) *= x;
827     }
828     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
829     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
830   }
831   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
832   PetscFunctionReturn(0);
833 }
834 
835 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
836 {
837   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
838   PetscErrorCode    ierr;
839   PetscInt          i,j;
840   PetscMPIInt       size;
841   PetscReal         sum = 0.0;
842   const PetscScalar *av,*v;
843 
844   PetscFunctionBegin;
845   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
846   v    = av;
847   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
848   if (size == 1) {
849     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
850   } else {
851     if (type == NORM_FROBENIUS) {
852       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
853         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
854       }
855       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
856       *nrm = PetscSqrtReal(*nrm);
857       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
858     } else if (type == NORM_1) {
859       PetscReal *tmp,*tmp2;
860       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
861       *nrm = 0.0;
862       v    = av;
863       for (j=0; j<mdn->A->cmap->n; j++) {
864         for (i=0; i<mdn->A->rmap->n; i++) {
865           tmp[j] += PetscAbsScalar(*v);  v++;
866         }
867       }
868       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
869       for (j=0; j<A->cmap->N; j++) {
870         if (tmp2[j] > *nrm) *nrm = tmp2[j];
871       }
872       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
873       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
874     } else if (type == NORM_INFINITY) { /* max row norm */
875       PetscReal ntemp;
876       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
877       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
878     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
879   }
880   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
885 {
886   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
887   Mat            B;
888   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
889   PetscErrorCode ierr;
890   PetscInt       j,i,lda;
891   PetscScalar    *v;
892 
893   PetscFunctionBegin;
894   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
895     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
896     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
897     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
898     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
899   } else B = *matout;
900 
901   m    = a->A->rmap->n; n = a->A->cmap->n;
902   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
903   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
904   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
905   for (i=0; i<m; i++) rwork[i] = rstart + i;
906   for (j=0; j<n; j++) {
907     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
908     v   += lda;
909   }
910   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
911   ierr = PetscFree(rwork);CHKERRQ(ierr);
912   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
913   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
914   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
915     *matout = B;
916   } else {
917     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
923 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
924 
925 PetscErrorCode MatSetUp_MPIDense(Mat A)
926 {
927   PetscErrorCode ierr;
928 
929   PetscFunctionBegin;
930   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
931   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
932   if (!A->preallocated) {
933     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
939 {
940   PetscErrorCode ierr;
941   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
942 
943   PetscFunctionBegin;
944   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
945   PetscFunctionReturn(0);
946 }
947 
948 PetscErrorCode MatConjugate_MPIDense(Mat mat)
949 {
950   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
951   PetscErrorCode ierr;
952 
953   PetscFunctionBegin;
954   ierr = MatConjugate(a->A);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 PetscErrorCode MatRealPart_MPIDense(Mat A)
959 {
960   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
961   PetscErrorCode ierr;
962 
963   PetscFunctionBegin;
964   ierr = MatRealPart(a->A);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
969 {
970   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
971   PetscErrorCode ierr;
972 
973   PetscFunctionBegin;
974   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
979 {
980   PetscErrorCode ierr;
981   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
982 
983   PetscFunctionBegin;
984   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
985   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
986   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
991 
992 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
993 {
994   PetscErrorCode ierr;
995   PetscInt       i,n;
996   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
997   PetscReal      *work;
998 
999   PetscFunctionBegin;
1000   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1001   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1002   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1003   if (type == NORM_2) {
1004     for (i=0; i<n; i++) work[i] *= work[i];
1005   }
1006   if (type == NORM_INFINITY) {
1007     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1008   } else {
1009     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1010   }
1011   ierr = PetscFree(work);CHKERRQ(ierr);
1012   if (type == NORM_2) {
1013     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1014   }
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #if defined(PETSC_HAVE_CUDA)
1019 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1020 {
1021   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1022   PetscErrorCode ierr;
1023   PetscInt       lda;
1024 
1025   PetscFunctionBegin;
1026   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1027   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1028   if (!a->cvec) {
1029     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1030     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1031   }
1032   a->vecinuse = col + 1;
1033   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1034   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1035   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1036   *v   = a->cvec;
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1041 {
1042   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1047   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1048   a->vecinuse = 0;
1049   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1050   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1051   *v   = NULL;
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1056 {
1057   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1058   PetscInt       lda;
1059   PetscErrorCode ierr;
1060 
1061   PetscFunctionBegin;
1062   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1063   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1064   if (!a->cvec) {
1065     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1066     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1067   }
1068   a->vecinuse = col + 1;
1069   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1070   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1071   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1072   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1073   *v   = a->cvec;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1078 {
1079   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1080   PetscErrorCode ierr;
1081 
1082   PetscFunctionBegin;
1083   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1084   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1085   a->vecinuse = 0;
1086   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1087   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1088   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1089   *v   = NULL;
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1094 {
1095   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1096   PetscErrorCode ierr;
1097   PetscInt       lda;
1098 
1099   PetscFunctionBegin;
1100   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1101   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1102   if (!a->cvec) {
1103     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1104     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1105   }
1106   a->vecinuse = col + 1;
1107   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1108   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1109   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1110   *v   = a->cvec;
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1115 {
1116   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1117   PetscErrorCode ierr;
1118 
1119   PetscFunctionBegin;
1120   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1121   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1122   a->vecinuse = 0;
1123   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1124   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1125   *v   = NULL;
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1130 {
1131   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1136   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1137   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1142 {
1143   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1144   PetscErrorCode ierr;
1145 
1146   PetscFunctionBegin;
1147   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1148   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1149   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1154 {
1155   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1156   PetscErrorCode ierr;
1157 
1158   PetscFunctionBegin;
1159   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1160   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1161   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1166 {
1167   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1168   PetscErrorCode ierr;
1169 
1170   PetscFunctionBegin;
1171   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1176 {
1177   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1178   PetscErrorCode ierr;
1179 
1180   PetscFunctionBegin;
1181   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1186 {
1187   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1188   PetscErrorCode ierr;
1189 
1190   PetscFunctionBegin;
1191   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1196 {
1197   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1198   PetscErrorCode ierr;
1199 
1200   PetscFunctionBegin;
1201   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1206 {
1207   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1208   PetscErrorCode ierr;
1209 
1210   PetscFunctionBegin;
1211   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1216 {
1217   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1226 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1227 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1228 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1229 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1230 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1231 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1232 
1233 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1234 {
1235   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1240   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1241   if (d->A) {
1242     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1243   }
1244   mat->boundtocpu = bind;
1245   if (!bind) {
1246     PetscBool iscuda;
1247 
1248     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1249     if (!iscuda) {
1250       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1251     }
1252     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1253     if (!iscuda) {
1254       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1255     }
1256     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1257     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1258     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1259     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1260     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1261     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1262   } else {
1263     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1264     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1265     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1266     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1267     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1268     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1269   }
1270   if (d->cmat) {
1271     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1277 {
1278   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1279   PetscErrorCode ierr;
1280   PetscBool      iscuda;
1281 
1282   PetscFunctionBegin;
1283   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1284   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1285   if (!iscuda) PetscFunctionReturn(0);
1286   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1287   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1288   if (!d->A) {
1289     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1290     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1291     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1292   }
1293   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1294   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1295   A->preallocated = PETSC_TRUE;
1296   PetscFunctionReturn(0);
1297 }
1298 #endif
1299 
1300 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1301 {
1302   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1303   PetscErrorCode ierr;
1304 
1305   PetscFunctionBegin;
1306   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1311 {
1312   PetscFunctionBegin;
1313   *missing = PETSC_FALSE;
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1318 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1319 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1320 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1321 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1322 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1323 
1324 /* -------------------------------------------------------------------*/
1325 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1326                                         MatGetRow_MPIDense,
1327                                         MatRestoreRow_MPIDense,
1328                                         MatMult_MPIDense,
1329                                 /*  4*/ MatMultAdd_MPIDense,
1330                                         MatMultTranspose_MPIDense,
1331                                         MatMultTransposeAdd_MPIDense,
1332                                         0,
1333                                         0,
1334                                         0,
1335                                 /* 10*/ 0,
1336                                         0,
1337                                         0,
1338                                         0,
1339                                         MatTranspose_MPIDense,
1340                                 /* 15*/ MatGetInfo_MPIDense,
1341                                         MatEqual_MPIDense,
1342                                         MatGetDiagonal_MPIDense,
1343                                         MatDiagonalScale_MPIDense,
1344                                         MatNorm_MPIDense,
1345                                 /* 20*/ MatAssemblyBegin_MPIDense,
1346                                         MatAssemblyEnd_MPIDense,
1347                                         MatSetOption_MPIDense,
1348                                         MatZeroEntries_MPIDense,
1349                                 /* 24*/ MatZeroRows_MPIDense,
1350                                         0,
1351                                         0,
1352                                         0,
1353                                         0,
1354                                 /* 29*/ MatSetUp_MPIDense,
1355                                         0,
1356                                         0,
1357                                         MatGetDiagonalBlock_MPIDense,
1358                                         0,
1359                                 /* 34*/ MatDuplicate_MPIDense,
1360                                         0,
1361                                         0,
1362                                         0,
1363                                         0,
1364                                 /* 39*/ MatAXPY_MPIDense,
1365                                         MatCreateSubMatrices_MPIDense,
1366                                         0,
1367                                         MatGetValues_MPIDense,
1368                                         0,
1369                                 /* 44*/ 0,
1370                                         MatScale_MPIDense,
1371                                         MatShift_Basic,
1372                                         0,
1373                                         0,
1374                                 /* 49*/ MatSetRandom_MPIDense,
1375                                         0,
1376                                         0,
1377                                         0,
1378                                         0,
1379                                 /* 54*/ 0,
1380                                         0,
1381                                         0,
1382                                         0,
1383                                         0,
1384                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1385                                         MatDestroy_MPIDense,
1386                                         MatView_MPIDense,
1387                                         0,
1388                                         0,
1389                                 /* 64*/ 0,
1390                                         0,
1391                                         0,
1392                                         0,
1393                                         0,
1394                                 /* 69*/ 0,
1395                                         0,
1396                                         0,
1397                                         0,
1398                                         0,
1399                                 /* 74*/ 0,
1400                                         0,
1401                                         0,
1402                                         0,
1403                                         0,
1404                                 /* 79*/ 0,
1405                                         0,
1406                                         0,
1407                                         0,
1408                                 /* 83*/ MatLoad_MPIDense,
1409                                         0,
1410                                         0,
1411                                         0,
1412                                         0,
1413                                         0,
1414                                 /* 89*/ 0,
1415                                         0,
1416                                         0,
1417                                         0,
1418                                         0,
1419                                 /* 94*/ 0,
1420                                         0,
1421                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1422                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1423                                         0,
1424                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1425                                         0,
1426                                         0,
1427                                         MatConjugate_MPIDense,
1428                                         0,
1429                                 /*104*/ 0,
1430                                         MatRealPart_MPIDense,
1431                                         MatImaginaryPart_MPIDense,
1432                                         0,
1433                                         0,
1434                                 /*109*/ 0,
1435                                         0,
1436                                         0,
1437                                         MatGetColumnVector_MPIDense,
1438                                         MatMissingDiagonal_MPIDense,
1439                                 /*114*/ 0,
1440                                         0,
1441                                         0,
1442                                         0,
1443                                         0,
1444                                 /*119*/ 0,
1445                                         0,
1446                                         0,
1447                                         0,
1448                                         0,
1449                                 /*124*/ 0,
1450                                         MatGetColumnNorms_MPIDense,
1451                                         0,
1452                                         0,
1453                                         0,
1454                                 /*129*/ 0,
1455                                         0,
1456                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1457                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1458                                         0,
1459                                 /*134*/ 0,
1460                                         0,
1461                                         0,
1462                                         0,
1463                                         0,
1464                                 /*139*/ 0,
1465                                         0,
1466                                         0,
1467                                         0,
1468                                         0,
1469                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1470                                 /*145*/ 0,
1471                                         0,
1472                                         0
1473 };
1474 
1475 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1476 {
1477   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1478   PetscBool      iscuda;
1479   PetscErrorCode ierr;
1480 
1481   PetscFunctionBegin;
1482   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1483   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1484   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1485   if (!a->A) {
1486     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1487     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1488     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1489   }
1490   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1491   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1492   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1493   mat->preallocated = PETSC_TRUE;
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 #if defined(PETSC_HAVE_ELEMENTAL)
1498 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1499 {
1500   Mat            mat_elemental;
1501   PetscErrorCode ierr;
1502   PetscScalar    *v;
1503   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1504 
1505   PetscFunctionBegin;
1506   if (reuse == MAT_REUSE_MATRIX) {
1507     mat_elemental = *newmat;
1508     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1509   } else {
1510     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1511     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1512     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1513     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1514     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1515   }
1516 
1517   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1518   for (i=0; i<N; i++) cols[i] = i;
1519   for (i=0; i<m; i++) rows[i] = rstart + i;
1520 
1521   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1522   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1523   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1524   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1525   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1526   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1527   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1528 
1529   if (reuse == MAT_INPLACE_MATRIX) {
1530     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1531   } else {
1532     *newmat = mat_elemental;
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 #endif
1537 
1538 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1539 {
1540   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1549 {
1550   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1559 {
1560   PetscErrorCode ierr;
1561   Mat_MPIDense   *mat;
1562   PetscInt       m,nloc,N;
1563 
1564   PetscFunctionBegin;
1565   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1566   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1567   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1568     PetscInt sum;
1569 
1570     if (n == PETSC_DECIDE) {
1571       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1572     }
1573     /* Check sum(n) = N */
1574     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1575     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1576 
1577     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1578   }
1579 
1580   /* numeric phase */
1581   mat = (Mat_MPIDense*)(*outmat)->data;
1582   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1583   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1584   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #if defined(PETSC_HAVE_CUDA)
1589 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1590 {
1591   Mat            B;
1592   Mat_MPIDense   *m;
1593   PetscErrorCode ierr;
1594 
1595   PetscFunctionBegin;
1596   if (reuse == MAT_INITIAL_MATRIX) {
1597     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1598   } else if (reuse == MAT_REUSE_MATRIX) {
1599     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1600   }
1601 
1602   B    = *newmat;
1603   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1604   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1605   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1606   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1621   m    = (Mat_MPIDense*)(B)->data;
1622   if (m->A) {
1623     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1624     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1625   }
1626   B->ops->bindtocpu = NULL;
1627   B->offloadmask    = PETSC_OFFLOAD_CPU;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1632 {
1633   Mat            B;
1634   Mat_MPIDense   *m;
1635   PetscErrorCode ierr;
1636 
1637   PetscFunctionBegin;
1638   if (reuse == MAT_INITIAL_MATRIX) {
1639     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1640   } else if (reuse == MAT_REUSE_MATRIX) {
1641     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1642   }
1643 
1644   B    = *newmat;
1645   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1646   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1647   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1649   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1654   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1662   m    = (Mat_MPIDense*)(B)->data;
1663   if (m->A) {
1664     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1665     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1666     B->offloadmask = PETSC_OFFLOAD_BOTH;
1667   } else {
1668     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1669   }
1670   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1671 
1672   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1673   PetscFunctionReturn(0);
1674 }
1675 #endif
1676 
1677 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1678 {
1679   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1680   PetscErrorCode ierr;
1681   PetscInt       lda;
1682 
1683   PetscFunctionBegin;
1684   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1685   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1686   if (!a->cvec) {
1687     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1688   }
1689   a->vecinuse = col + 1;
1690   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1691   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1692   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1693   *v   = a->cvec;
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1698 {
1699   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1700   PetscErrorCode ierr;
1701 
1702   PetscFunctionBegin;
1703   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1704   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1705   a->vecinuse = 0;
1706   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1707   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1708   *v   = NULL;
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1713 {
1714   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1715   PetscErrorCode ierr;
1716   PetscInt       lda;
1717 
1718   PetscFunctionBegin;
1719   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1720   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1721   if (!a->cvec) {
1722     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1723   }
1724   a->vecinuse = col + 1;
1725   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1726   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1727   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1728   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1729   *v   = a->cvec;
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1734 {
1735   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1736   PetscErrorCode ierr;
1737 
1738   PetscFunctionBegin;
1739   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1740   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1741   a->vecinuse = 0;
1742   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1743   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1744   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1745   *v   = NULL;
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1750 {
1751   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1752   PetscErrorCode ierr;
1753   PetscInt       lda;
1754 
1755   PetscFunctionBegin;
1756   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1757   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1758   if (!a->cvec) {
1759     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1760   }
1761   a->vecinuse = col + 1;
1762   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1763   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1764   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1765   *v   = a->cvec;
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1770 {
1771   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1772   PetscErrorCode ierr;
1773 
1774   PetscFunctionBegin;
1775   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1776   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1777   a->vecinuse = 0;
1778   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1779   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1780   *v   = NULL;
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1785 {
1786   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1787   Mat_MPIDense   *c;
1788   PetscErrorCode ierr;
1789   MPI_Comm       comm;
1790   PetscBool      setup = PETSC_FALSE;
1791 
1792   PetscFunctionBegin;
1793   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1794   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1795   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1796   if (!a->cmat) {
1797     setup = PETSC_TRUE;
1798     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1799     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1800     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1801     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1802     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1803     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1804   } else if (cend-cbegin != a->cmat->cmap->N) {
1805     setup = PETSC_TRUE;
1806     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1807     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1808     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1809     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1810   }
1811   c = (Mat_MPIDense*)a->cmat->data;
1812   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1813   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1814   if (setup) { /* do we really need this? */
1815     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1816   }
1817   a->cmat->preallocated = PETSC_TRUE;
1818   a->cmat->assembled = PETSC_TRUE;
1819   a->matinuse = cbegin + 1;
1820   *v = a->cmat;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1825 {
1826   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1827   Mat_MPIDense   *c;
1828   PetscErrorCode ierr;
1829 
1830   PetscFunctionBegin;
1831   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1832   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1833   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1834   a->matinuse = 0;
1835   c    = (Mat_MPIDense*)a->cmat->data;
1836   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1837   *v   = NULL;
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1842 {
1843   Mat_MPIDense   *a;
1844   PetscErrorCode ierr;
1845 
1846   PetscFunctionBegin;
1847   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1848   mat->data = (void*)a;
1849   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1850 
1851   mat->insertmode = NOT_SET_VALUES;
1852 
1853   /* build cache for off array entries formed */
1854   a->donotstash = PETSC_FALSE;
1855 
1856   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1857 
1858   /* stuff used for matrix vector multiply */
1859   a->lvec        = 0;
1860   a->Mvctx       = 0;
1861   a->roworiented = PETSC_TRUE;
1862 
1863   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1864   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1865   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1866   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1867   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1868   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1869   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1870   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1871   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1872   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1873   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1874   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1875   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1876   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1877   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1878   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1879   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1880   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1881   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1882 #if defined(PETSC_HAVE_ELEMENTAL)
1883   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1884 #endif
1885 #if defined(PETSC_HAVE_CUDA)
1886   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1887 #endif
1888   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1889   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1890   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1891 #if defined(PETSC_HAVE_CUDA)
1892   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1893   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1894 #endif
1895 
1896   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1897   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1898   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 /*MC
1903    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1904 
1905    Options Database Keys:
1906 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1907 
1908   Level: beginner
1909 
1910 .seealso:
1911 
1912 M*/
1913 #if defined(PETSC_HAVE_CUDA)
1914 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1915 {
1916   PetscErrorCode ierr;
1917 
1918   PetscFunctionBegin;
1919   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1920   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1921   PetscFunctionReturn(0);
1922 }
1923 #endif
1924 
1925 /*MC
1926    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1927 
1928    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1929    and MATMPIDENSE otherwise.
1930 
1931    Options Database Keys:
1932 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1933 
1934   Level: beginner
1935 
1936 
1937 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1938 M*/
1939 
1940 /*MC
1941    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1942 
1943    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1944    and MATMPIDENSECUDA otherwise.
1945 
1946    Options Database Keys:
1947 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1948 
1949   Level: beginner
1950 
1951 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1952 M*/
1953 
1954 /*@C
1955    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1956 
1957    Not collective
1958 
1959    Input Parameters:
1960 .  B - the matrix
1961 -  data - optional location of matrix data.  Set data=NULL for PETSc
1962    to control all matrix memory allocation.
1963 
1964    Notes:
1965    The dense format is fully compatible with standard Fortran 77
1966    storage by columns.
1967 
1968    The data input variable is intended primarily for Fortran programmers
1969    who wish to allocate their own matrix memory space.  Most users should
1970    set data=NULL.
1971 
1972    Level: intermediate
1973 
1974 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1975 @*/
1976 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1977 {
1978   PetscErrorCode ierr;
1979 
1980   PetscFunctionBegin;
1981   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1982   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 /*@
1987    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1988    array provided by the user. This is useful to avoid copying an array
1989    into a matrix
1990 
1991    Not Collective
1992 
1993    Input Parameters:
1994 +  mat - the matrix
1995 -  array - the array in column major order
1996 
1997    Notes:
1998    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1999    freed when the matrix is destroyed.
2000 
2001    Level: developer
2002 
2003 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2004 
2005 @*/
2006 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2007 {
2008   PetscErrorCode ierr;
2009 
2010   PetscFunctionBegin;
2011   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2012   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2013   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2014 #if defined(PETSC_HAVE_CUDA)
2015   mat->offloadmask = PETSC_OFFLOAD_CPU;
2016 #endif
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 /*@
2021    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2022 
2023    Not Collective
2024 
2025    Input Parameters:
2026 .  mat - the matrix
2027 
2028    Notes:
2029    You can only call this after a call to MatDensePlaceArray()
2030 
2031    Level: developer
2032 
2033 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2034 
2035 @*/
2036 PetscErrorCode  MatDenseResetArray(Mat mat)
2037 {
2038   PetscErrorCode ierr;
2039 
2040   PetscFunctionBegin;
2041   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2042   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2043   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 /*@
2048    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2049    array provided by the user. This is useful to avoid copying an array
2050    into a matrix
2051 
2052    Not Collective
2053 
2054    Input Parameters:
2055 +  mat - the matrix
2056 -  array - the array in column major order
2057 
2058    Notes:
2059    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2060    freed by the user. It will be freed when the matrix is destroyed.
2061 
2062    Level: developer
2063 
2064 .seealso: MatDenseGetArray(), VecReplaceArray()
2065 @*/
2066 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2067 {
2068   PetscErrorCode ierr;
2069 
2070   PetscFunctionBegin;
2071   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2072   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2073   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2074 #if defined(PETSC_HAVE_CUDA)
2075   mat->offloadmask = PETSC_OFFLOAD_CPU;
2076 #endif
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 #if defined(PETSC_HAVE_CUDA)
2081 /*@C
2082    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2083    array provided by the user. This is useful to avoid copying an array
2084    into a matrix
2085 
2086    Not Collective
2087 
2088    Input Parameters:
2089 +  mat - the matrix
2090 -  array - the array in column major order
2091 
2092    Notes:
2093    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2094    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2095 
2096    Level: developer
2097 
2098 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2099 @*/
2100 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2101 {
2102   PetscErrorCode ierr;
2103 
2104   PetscFunctionBegin;
2105   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2106   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2107   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2108   mat->offloadmask = PETSC_OFFLOAD_GPU;
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 /*@C
2113    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2114 
2115    Not Collective
2116 
2117    Input Parameters:
2118 .  mat - the matrix
2119 
2120    Notes:
2121    You can only call this after a call to MatDenseCUDAPlaceArray()
2122 
2123    Level: developer
2124 
2125 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2126 
2127 @*/
2128 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2129 {
2130   PetscErrorCode ierr;
2131 
2132   PetscFunctionBegin;
2133   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2134   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2135   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 /*@C
2140    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2141    array provided by the user. This is useful to avoid copying an array
2142    into a matrix
2143 
2144    Not Collective
2145 
2146    Input Parameters:
2147 +  mat - the matrix
2148 -  array - the array in column major order
2149 
2150    Notes:
2151    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2152    The memory passed in CANNOT be freed by the user. It will be freed
2153    when the matrix is destroyed. The array should respect the matrix leading dimension.
2154 
2155    Level: developer
2156 
2157 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2158 @*/
2159 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2160 {
2161   PetscErrorCode ierr;
2162 
2163   PetscFunctionBegin;
2164   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2165   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2166   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2167   mat->offloadmask = PETSC_OFFLOAD_GPU;
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 /*@C
2172    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2173 
2174    Not Collective
2175 
2176    Input Parameters:
2177 .  A - the matrix
2178 
2179    Output Parameters
2180 .  array - the GPU array in column major order
2181 
2182    Notes:
2183    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2184 
2185    Level: developer
2186 
2187 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2188 @*/
2189 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2190 {
2191   PetscErrorCode ierr;
2192 
2193   PetscFunctionBegin;
2194   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2195   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2196   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 /*@C
2201    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2202 
2203    Not Collective
2204 
2205    Input Parameters:
2206 +  A - the matrix
2207 -  array - the GPU array in column major order
2208 
2209    Notes:
2210 
2211    Level: developer
2212 
2213 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2214 @*/
2215 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2216 {
2217   PetscErrorCode ierr;
2218 
2219   PetscFunctionBegin;
2220   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2221   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2222   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2223   A->offloadmask = PETSC_OFFLOAD_GPU;
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 /*@C
2228    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2229 
2230    Not Collective
2231 
2232    Input Parameters:
2233 .  A - the matrix
2234 
2235    Output Parameters
2236 .  array - the GPU array in column major order
2237 
2238    Notes:
2239    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2240 
2241    Level: developer
2242 
2243 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2244 @*/
2245 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2246 {
2247   PetscErrorCode ierr;
2248 
2249   PetscFunctionBegin;
2250   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2251   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 /*@C
2256    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2257 
2258    Not Collective
2259 
2260    Input Parameters:
2261 +  A - the matrix
2262 -  array - the GPU array in column major order
2263 
2264    Notes:
2265    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2266 
2267    Level: developer
2268 
2269 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2270 @*/
2271 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2272 {
2273   PetscErrorCode ierr;
2274 
2275   PetscFunctionBegin;
2276   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 /*@C
2281    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2282 
2283    Not Collective
2284 
2285    Input Parameters:
2286 .  A - the matrix
2287 
2288    Output Parameters
2289 .  array - the GPU array in column major order
2290 
2291    Notes:
2292    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2293 
2294    Level: developer
2295 
2296 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2297 @*/
2298 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2299 {
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2304   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2305   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 /*@C
2310    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2311 
2312    Not Collective
2313 
2314    Input Parameters:
2315 +  A - the matrix
2316 -  array - the GPU array in column major order
2317 
2318    Notes:
2319 
2320    Level: developer
2321 
2322 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2323 @*/
2324 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2325 {
2326   PetscErrorCode ierr;
2327 
2328   PetscFunctionBegin;
2329   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2330   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2331   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2332   A->offloadmask = PETSC_OFFLOAD_GPU;
2333   PetscFunctionReturn(0);
2334 }
2335 #endif
2336 
2337 /*@C
2338    MatCreateDense - Creates a matrix in dense format.
2339 
2340    Collective
2341 
2342    Input Parameters:
2343 +  comm - MPI communicator
2344 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2345 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2346 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2347 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2348 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2349    to control all matrix memory allocation.
2350 
2351    Output Parameter:
2352 .  A - the matrix
2353 
2354    Notes:
2355    The dense format is fully compatible with standard Fortran 77
2356    storage by columns.
2357 
2358    The data input variable is intended primarily for Fortran programmers
2359    who wish to allocate their own matrix memory space.  Most users should
2360    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2361 
2362    The user MUST specify either the local or global matrix dimensions
2363    (possibly both).
2364 
2365    Level: intermediate
2366 
2367 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2368 @*/
2369 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2370 {
2371   PetscErrorCode ierr;
2372   PetscMPIInt    size;
2373 
2374   PetscFunctionBegin;
2375   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2376   PetscValidLogicalCollectiveBool(*A,!!data,6);
2377   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2378   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2379   if (size > 1) {
2380     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2381     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2382     if (data) {  /* user provided data array, so no need to assemble */
2383       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2384       (*A)->assembled = PETSC_TRUE;
2385     }
2386   } else {
2387     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2388     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2389   }
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #if defined(PETSC_HAVE_CUDA)
2394 /*@C
2395    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2396 
2397    Collective
2398 
2399    Input Parameters:
2400 +  comm - MPI communicator
2401 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2402 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2403 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2404 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2405 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2406    to control matrix memory allocation.
2407 
2408    Output Parameter:
2409 .  A - the matrix
2410 
2411    Notes:
2412 
2413    Level: intermediate
2414 
2415 .seealso: MatCreate(), MatCreateDense()
2416 @*/
2417 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2418 {
2419   PetscErrorCode ierr;
2420   PetscMPIInt    size;
2421 
2422   PetscFunctionBegin;
2423   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2424   PetscValidLogicalCollectiveBool(*A,!!data,6);
2425   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2426   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2427   if (size > 1) {
2428     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2429     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2430     if (data) {  /* user provided data array, so no need to assemble */
2431       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2432       (*A)->assembled = PETSC_TRUE;
2433     }
2434   } else {
2435     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2436     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2437   }
2438   PetscFunctionReturn(0);
2439 }
2440 #endif
2441 
2442 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2443 {
2444   Mat            mat;
2445   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2446   PetscErrorCode ierr;
2447 
2448   PetscFunctionBegin;
2449   *newmat = 0;
2450   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2451   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2452   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2453   a       = (Mat_MPIDense*)mat->data;
2454 
2455   mat->factortype   = A->factortype;
2456   mat->assembled    = PETSC_TRUE;
2457   mat->preallocated = PETSC_TRUE;
2458 
2459   mat->insertmode = NOT_SET_VALUES;
2460   a->donotstash   = oldmat->donotstash;
2461 
2462   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2463   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2464 
2465   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2466   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2467   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2468 
2469   *newmat = mat;
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2474 {
2475   PetscErrorCode ierr;
2476   PetscBool      isbinary;
2477 #if defined(PETSC_HAVE_HDF5)
2478   PetscBool      ishdf5;
2479 #endif
2480 
2481   PetscFunctionBegin;
2482   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2483   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2484   /* force binary viewer to load .info file if it has not yet done so */
2485   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2486   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2487 #if defined(PETSC_HAVE_HDF5)
2488   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2489 #endif
2490   if (isbinary) {
2491     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2492 #if defined(PETSC_HAVE_HDF5)
2493   } else if (ishdf5) {
2494     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2495 #endif
2496   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2501 {
2502   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2503   Mat            a,b;
2504   PetscBool      flg;
2505   PetscErrorCode ierr;
2506 
2507   PetscFunctionBegin;
2508   a    = matA->A;
2509   b    = matB->A;
2510   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2511   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2516 {
2517   PetscErrorCode        ierr;
2518   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2519 
2520   PetscFunctionBegin;
2521   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2522   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2523   ierr = PetscFree(atb);CHKERRQ(ierr);
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2528 {
2529   PetscErrorCode        ierr;
2530   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2531 
2532   PetscFunctionBegin;
2533   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2534   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2535   ierr = PetscFree(abt);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2540 {
2541   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2542   Mat_TransMatMultDense *atb;
2543   PetscErrorCode        ierr;
2544   MPI_Comm              comm;
2545   PetscMPIInt           size,*recvcounts;
2546   PetscScalar           *carray,*sendbuf;
2547   const PetscScalar     *atbarray;
2548   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2549   const PetscInt        *ranges;
2550 
2551   PetscFunctionBegin;
2552   MatCheckProduct(C,3);
2553   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2554   atb = (Mat_TransMatMultDense *)C->product->data;
2555   recvcounts = atb->recvcounts;
2556   sendbuf = atb->sendbuf;
2557 
2558   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2559   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2560 
2561   /* compute atbarray = aseq^T * bseq */
2562   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2563 
2564   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2565 
2566   /* arrange atbarray into sendbuf */
2567   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2568   for (proc=0, k=0; proc<size; proc++) {
2569     for (j=0; j<cN; j++) {
2570       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2571     }
2572   }
2573   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2574 
2575   /* sum all atbarray to local values of C */
2576   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2577   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2578   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2579   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2580   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2585 {
2586   PetscErrorCode        ierr;
2587   MPI_Comm              comm;
2588   PetscMPIInt           size;
2589   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2590   Mat_TransMatMultDense *atb;
2591   PetscBool             cisdense;
2592   PetscInt              i;
2593   const PetscInt        *ranges;
2594 
2595   PetscFunctionBegin;
2596   MatCheckProduct(C,3);
2597   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2598   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2599   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2600     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2601   }
2602 
2603   /* create matrix product C */
2604   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2605   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2606   if (!cisdense) {
2607     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2608   }
2609   ierr = MatSetUp(C);CHKERRQ(ierr);
2610 
2611   /* create data structure for reuse C */
2612   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2613   ierr = PetscNew(&atb);CHKERRQ(ierr);
2614   cM   = C->rmap->N;
2615   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2616   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2617   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2618 
2619   C->product->data    = atb;
2620   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2625 {
2626   PetscErrorCode        ierr;
2627   MPI_Comm              comm;
2628   PetscMPIInt           i, size;
2629   PetscInt              maxRows, bufsiz;
2630   PetscMPIInt           tag;
2631   PetscInt              alg;
2632   Mat_MatTransMultDense *abt;
2633   Mat_Product           *product = C->product;
2634   PetscBool             flg;
2635 
2636   PetscFunctionBegin;
2637   MatCheckProduct(C,4);
2638   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2639   /* check local size of A and B */
2640   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2641 
2642   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2643   alg  = flg ? 0 : 1;
2644 
2645   /* setup matrix product C */
2646   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2647   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2648   ierr = MatSetUp(C);CHKERRQ(ierr);
2649   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2650 
2651   /* create data structure for reuse C */
2652   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2653   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2654   ierr = PetscNew(&abt);CHKERRQ(ierr);
2655   abt->tag = tag;
2656   abt->alg = alg;
2657   switch (alg) {
2658   case 1: /* alg: "cyclic" */
2659     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2660     bufsiz = A->cmap->N * maxRows;
2661     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2662     break;
2663   default: /* alg: "allgatherv" */
2664     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2665     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2666     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2667     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2668     break;
2669   }
2670 
2671   C->product->data    = abt;
2672   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2673   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2678 {
2679   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2680   Mat_MatTransMultDense *abt;
2681   PetscErrorCode        ierr;
2682   MPI_Comm              comm;
2683   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2684   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2685   PetscInt              i,cK=A->cmap->N,k,j,bn;
2686   PetscScalar           _DOne=1.0,_DZero=0.0;
2687   const PetscScalar     *av,*bv;
2688   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2689   MPI_Request           reqs[2];
2690   const PetscInt        *ranges;
2691 
2692   PetscFunctionBegin;
2693   MatCheckProduct(C,3);
2694   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2695   abt  = (Mat_MatTransMultDense*)C->product->data;
2696   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2697   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2698   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2699   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2700   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2701   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2702   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2703   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2704   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2705   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2706   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2707   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2708   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2709   bn   = B->rmap->n;
2710   if (blda == bn) {
2711     sendbuf = (PetscScalar*)bv;
2712   } else {
2713     sendbuf = abt->buf[0];
2714     for (k = 0, i = 0; i < cK; i++) {
2715       for (j = 0; j < bn; j++, k++) {
2716         sendbuf[k] = bv[i * blda + j];
2717       }
2718     }
2719   }
2720   if (size > 1) {
2721     sendto = (rank + size - 1) % size;
2722     recvfrom = (rank + size + 1) % size;
2723   } else {
2724     sendto = recvfrom = 0;
2725   }
2726   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2727   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2728   recvisfrom = rank;
2729   for (i = 0; i < size; i++) {
2730     /* we have finished receiving in sending, bufs can be read/modified */
2731     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2732     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2733 
2734     if (nextrecvisfrom != rank) {
2735       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2736       sendsiz = cK * bn;
2737       recvsiz = cK * nextbn;
2738       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2739       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2740       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2741     }
2742 
2743     /* local aseq * sendbuf^T */
2744     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2745     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2746 
2747     if (nextrecvisfrom != rank) {
2748       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2749       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2750     }
2751     bn = nextbn;
2752     recvisfrom = nextrecvisfrom;
2753     sendbuf = recvbuf;
2754   }
2755   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2756   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2757   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2758   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2759   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2764 {
2765   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2766   Mat_MatTransMultDense *abt;
2767   PetscErrorCode        ierr;
2768   MPI_Comm              comm;
2769   PetscMPIInt           size;
2770   PetscScalar           *cv, *sendbuf, *recvbuf;
2771   const PetscScalar     *av,*bv;
2772   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2773   PetscScalar           _DOne=1.0,_DZero=0.0;
2774   PetscBLASInt          cm, cn, ck, alda, clda;
2775 
2776   PetscFunctionBegin;
2777   MatCheckProduct(C,3);
2778   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2779   abt  = (Mat_MatTransMultDense*)C->product->data;
2780   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2781   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2782   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2783   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2784   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2785   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2786   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2787   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2788   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2789   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2790   /* copy transpose of B into buf[0] */
2791   bn      = B->rmap->n;
2792   sendbuf = abt->buf[0];
2793   recvbuf = abt->buf[1];
2794   for (k = 0, j = 0; j < bn; j++) {
2795     for (i = 0; i < cK; i++, k++) {
2796       sendbuf[k] = bv[i * blda + j];
2797     }
2798   }
2799   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2800   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2801   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2802   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2803   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2804   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2805   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2806   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2807   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2808   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2809   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2814 {
2815   Mat_MatTransMultDense *abt;
2816   PetscErrorCode        ierr;
2817 
2818   PetscFunctionBegin;
2819   MatCheckProduct(C,3);
2820   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2821   abt = (Mat_MatTransMultDense*)C->product->data;
2822   switch (abt->alg) {
2823   case 1:
2824     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2825     break;
2826   default:
2827     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2828     break;
2829   }
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2834 {
2835   PetscErrorCode   ierr;
2836   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2837 
2838   PetscFunctionBegin;
2839   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2840   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2841   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2842   ierr = PetscFree(ab);CHKERRQ(ierr);
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 #if defined(PETSC_HAVE_ELEMENTAL)
2847 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2848 {
2849   PetscErrorCode   ierr;
2850   Mat_MatMultDense *ab;
2851 
2852   PetscFunctionBegin;
2853   MatCheckProduct(C,3);
2854   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2855   ab   = (Mat_MatMultDense*)C->product->data;
2856   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2857   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2858   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2859   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2864 {
2865   PetscErrorCode   ierr;
2866   Mat              Ae,Be,Ce;
2867   Mat_MatMultDense *ab;
2868 
2869   PetscFunctionBegin;
2870   MatCheckProduct(C,4);
2871   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2872   /* check local size of A and B */
2873   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2874     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2875   }
2876 
2877   /* create elemental matrices Ae and Be */
2878   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2879   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2880   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2881   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2882   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2883 
2884   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2885   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2886   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2887   ierr = MatSetUp(Be);CHKERRQ(ierr);
2888   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2889 
2890   /* compute symbolic Ce = Ae*Be */
2891   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2892   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2893 
2894   /* setup C */
2895   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2896   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2897   ierr = MatSetUp(C);CHKERRQ(ierr);
2898 
2899   /* create data structure for reuse Cdense */
2900   ierr = PetscNew(&ab);CHKERRQ(ierr);
2901   ab->Ae = Ae;
2902   ab->Be = Be;
2903   ab->Ce = Ce;
2904 
2905   C->product->data    = ab;
2906   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2907   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2908   PetscFunctionReturn(0);
2909 }
2910 #endif
2911 /* ----------------------------------------------- */
2912 #if defined(PETSC_HAVE_ELEMENTAL)
2913 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2914 {
2915   PetscFunctionBegin;
2916   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2917   C->ops->productsymbolic = MatProductSymbolic_AB;
2918   PetscFunctionReturn(0);
2919 }
2920 #endif
2921 
2922 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2923 {
2924   Mat_Product *product = C->product;
2925   Mat         A = product->A,B=product->B;
2926 
2927   PetscFunctionBegin;
2928   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2929     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2930   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2931   C->ops->productsymbolic = MatProductSymbolic_AtB;
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2936 {
2937   PetscErrorCode ierr;
2938   Mat_Product    *product = C->product;
2939   const char     *algTypes[2] = {"allgatherv","cyclic"};
2940   PetscInt       alg,nalg = 2;
2941   PetscBool      flg = PETSC_FALSE;
2942 
2943   PetscFunctionBegin;
2944   /* Set default algorithm */
2945   alg = 0; /* default is allgatherv */
2946   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2947   if (flg) {
2948     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2949   }
2950 
2951   /* Get runtime option */
2952   if (product->api_user) {
2953     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2954     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2955     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2956   } else {
2957     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2958     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2959     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2960   }
2961   if (flg) {
2962     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2963   }
2964 
2965   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2966   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2967   PetscFunctionReturn(0);
2968 }
2969 
2970 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2971 {
2972   PetscErrorCode ierr;
2973   Mat_Product    *product = C->product;
2974 
2975   PetscFunctionBegin;
2976   switch (product->type) {
2977 #if defined(PETSC_HAVE_ELEMENTAL)
2978   case MATPRODUCT_AB:
2979     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2980     break;
2981 #endif
2982   case MATPRODUCT_AtB:
2983     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
2984     break;
2985   case MATPRODUCT_ABt:
2986     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
2987     break;
2988   default:
2989     break;
2990   }
2991   PetscFunctionReturn(0);
2992 }
2993