xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision c5b5d8d5d69aed0b9a5d08ebbb2e85e542e74f68)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
40 {
41   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
44 
45   PetscFunctionBegin;
46   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
47   lrow = row - rstart;
48   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscErrorCode ierr;
56   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
57 
58   PetscFunctionBegin;
59   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
60   lrow = row - rstart;
61   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
66 {
67   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
68   PetscErrorCode ierr;
69   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
70   PetscScalar    *array;
71   MPI_Comm       comm;
72   PetscBool      flg;
73   Mat            B;
74 
75   PetscFunctionBegin;
76   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
77   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
78   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
79   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
80 
81     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
82     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
100 {
101   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
102   PetscErrorCode ierr;
103   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
104   PetscBool      roworiented = A->roworiented;
105 
106   PetscFunctionBegin;
107   for (i=0; i<m; i++) {
108     if (idxm[i] < 0) continue;
109     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
110     if (idxm[i] >= rstart && idxm[i] < rend) {
111       row = idxm[i] - rstart;
112       if (roworiented) {
113         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
114       } else {
115         for (j=0; j<n; j++) {
116           if (idxn[j] < 0) continue;
117           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
118           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
119         }
120       }
121     } else if (!A->donotstash) {
122       mat->assembled = PETSC_FALSE;
123       if (roworiented) {
124         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
125       } else {
126         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
127       }
128     }
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
134 {
135   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
136   PetscErrorCode ierr;
137   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
142     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       for (j=0; j<n; j++) {
146         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
147         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
148         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
149       }
150     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
156 {
157   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
166 {
167   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
168   PetscBool      iscuda;
169   PetscErrorCode ierr;
170 
171   PetscFunctionBegin;
172   if (!a->A) {
173     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
174     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
175     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
176     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
177     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
178     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
179     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
180     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
181   }
182   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
187 {
188   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
189   PetscErrorCode ierr;
190 
191   PetscFunctionBegin;
192   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
193   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
204   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211   PetscErrorCode ierr;
212 
213   PetscFunctionBegin;
214   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
215   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
220 {
221   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
222   PetscErrorCode ierr;
223 
224   PetscFunctionBegin;
225   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
226   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
227   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
232 {
233   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
234   PetscErrorCode ierr;
235 
236   PetscFunctionBegin;
237   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
238   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
239   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
240   PetscFunctionReturn(0);
241 }
242 
243 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
244 {
245   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
246   PetscErrorCode ierr;
247 
248   PetscFunctionBegin;
249   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
250   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
251   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
256 {
257   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
258   PetscErrorCode    ierr;
259   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
260   const PetscInt    *irow,*icol;
261   const PetscScalar *v;
262   PetscScalar       *bv;
263   Mat               newmat;
264   IS                iscol_local;
265   MPI_Comm          comm_is,comm_mat;
266 
267   PetscFunctionBegin;
268   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
269   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
270   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
271 
272   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
273   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
274   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
275   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
276   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
277   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
278 
279   /* No parallel redistribution currently supported! Should really check each index set
280      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
281      original matrix! */
282 
283   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
284   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
285 
286   /* Check submatrix call */
287   if (scall == MAT_REUSE_MATRIX) {
288     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
289     /* Really need to test rows and column sizes! */
290     newmat = *B;
291   } else {
292     /* Create and fill new matrix */
293     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
294     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
295     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
296     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
297   }
298 
299   /* Now extract the data pointers and do the copy, column at a time */
300   newmatd = (Mat_MPIDense*)newmat->data;
301   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
302   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
303   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
304   for (i=0; i<Ncols; i++) {
305     const PetscScalar *av = v + lda*icol[i];
306     for (j=0; j<nrows; j++) {
307       *bv++ = av[irow[j] - rstart];
308     }
309   }
310   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
311   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
312 
313   /* Assemble the matrices so that the correct flags are set */
314   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
316 
317   /* Free work space */
318   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
319   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
320   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
321   *B   = newmat;
322   PetscFunctionReturn(0);
323 }
324 
325 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
326 {
327   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
328   PetscErrorCode ierr;
329 
330   PetscFunctionBegin;
331   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
336 {
337   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
338   PetscErrorCode ierr;
339 
340   PetscFunctionBegin;
341   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
346 {
347   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
348   PetscErrorCode ierr;
349 
350   PetscFunctionBegin;
351   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
352   PetscFunctionReturn(0);
353 }
354 
355 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
356 {
357   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
358   PetscErrorCode ierr;
359   PetscInt       nstash,reallocs;
360 
361   PetscFunctionBegin;
362   if (mdn->donotstash || mat->nooffprocentries) return(0);
363 
364   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
365   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
366   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
371 {
372   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
373   PetscErrorCode ierr;
374   PetscInt       i,*row,*col,flg,j,rstart,ncols;
375   PetscMPIInt    n;
376   PetscScalar    *val;
377 
378   PetscFunctionBegin;
379   if (!mdn->donotstash && !mat->nooffprocentries) {
380     /*  wait on receives */
381     while (1) {
382       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
383       if (!flg) break;
384 
385       for (i=0; i<n;) {
386         /* Now identify the consecutive vals belonging to the same row */
387         for (j=i,rstart=row[j]; j<n; j++) {
388           if (row[j] != rstart) break;
389         }
390         if (j < n) ncols = j-i;
391         else       ncols = n-i;
392         /* Now assemble all these values with a single function call */
393         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
394         i    = j;
395       }
396     }
397     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
398   }
399 
400   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
402 
403   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
404     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
405   }
406   PetscFunctionReturn(0);
407 }
408 
409 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
410 {
411   PetscErrorCode ierr;
412   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
413 
414   PetscFunctionBegin;
415   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
420 {
421   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
422   PetscErrorCode    ierr;
423   PetscInt          i,len,*lrows;
424 
425   PetscFunctionBegin;
426   /* get locally owned rows */
427   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
428   /* fix right hand side if needed */
429   if (x && b) {
430     const PetscScalar *xx;
431     PetscScalar       *bb;
432 
433     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
434     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
435     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
436     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
437     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
438   }
439   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
440   if (diag != 0.0) {
441     Vec d;
442 
443     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
444     ierr = VecSet(d,diag);CHKERRQ(ierr);
445     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
446     ierr = VecDestroy(&d);CHKERRQ(ierr);
447   }
448   ierr = PetscFree(lrows);CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
453 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
454 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
455 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
456 
457 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
458 {
459   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
460   PetscErrorCode    ierr;
461   const PetscScalar *ax;
462   PetscScalar       *ay;
463 
464   PetscFunctionBegin;
465   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
466   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
467   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
468   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
469   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
470   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
471   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
476 {
477   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
478   PetscErrorCode    ierr;
479   const PetscScalar *ax;
480   PetscScalar       *ay;
481 
482   PetscFunctionBegin;
483   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
484   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
485   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
486   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
487   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
488   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
489   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
494 {
495   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
496   PetscErrorCode    ierr;
497   const PetscScalar *ax;
498   PetscScalar       *ay;
499 
500   PetscFunctionBegin;
501   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
502   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
503   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
504   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
505   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
506   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
507   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
508   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
513 {
514   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
515   PetscErrorCode    ierr;
516   const PetscScalar *ax;
517   PetscScalar       *ay;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
523   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
524   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
525   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
526   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
527   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
528   PetscFunctionReturn(0);
529 }
530 
531 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
532 {
533   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
534   PetscErrorCode    ierr;
535   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
536   PetscScalar       *x,zero = 0.0;
537   const PetscScalar *av;
538 
539   PetscFunctionBegin;
540   ierr = VecSet(v,zero);CHKERRQ(ierr);
541   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
542   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
543   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
544   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
545   radd = A->rmap->rstart*m;
546   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
547   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
548   for (i=0; i<len; i++) {
549     x[i] = av[radd + i*lda + i];
550   }
551   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
552   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
553   PetscFunctionReturn(0);
554 }
555 
556 PetscErrorCode MatDestroy_MPIDense(Mat mat)
557 {
558   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
559   PetscErrorCode ierr;
560 
561   PetscFunctionBegin;
562 #if defined(PETSC_USE_LOG)
563   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
564 #endif
565   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
566   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
567   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
568   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
569   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
570   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
571   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
572   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
573 
574   ierr = PetscFree(mat->data);CHKERRQ(ierr);
575   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
576 
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
588 #if defined(PETSC_HAVE_ELEMENTAL)
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
590 #endif
591 #if defined(PETSC_HAVE_SCALAPACK)
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
593 #endif
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
597 #if defined (PETSC_HAVE_CUDA)
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
600 #endif
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
609   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
610   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
611   PetscFunctionReturn(0);
612 }
613 
614 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
615 
616 #include <petscdraw.h>
617 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
618 {
619   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
620   PetscErrorCode    ierr;
621   PetscMPIInt       rank;
622   PetscViewerType   vtype;
623   PetscBool         iascii,isdraw;
624   PetscViewer       sviewer;
625   PetscViewerFormat format;
626 
627   PetscFunctionBegin;
628   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
629   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
630   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
631   if (iascii) {
632     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
633     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
634     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
635       MatInfo info;
636       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
637       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
638       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
639       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
640       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
641       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
642       PetscFunctionReturn(0);
643     } else if (format == PETSC_VIEWER_ASCII_INFO) {
644       PetscFunctionReturn(0);
645     }
646   } else if (isdraw) {
647     PetscDraw draw;
648     PetscBool isnull;
649 
650     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
651     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
652     if (isnull) PetscFunctionReturn(0);
653   }
654 
655   {
656     /* assemble the entire matrix onto first processor. */
657     Mat         A;
658     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
659     PetscInt    *cols;
660     PetscScalar *vals;
661 
662     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
663     if (!rank) {
664       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
665     } else {
666       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
667     }
668     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
669     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
670     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
671     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
672 
673     /* Copy the matrix ... This isn't the most efficient means,
674        but it's quick for now */
675     A->insertmode = INSERT_VALUES;
676 
677     row = mat->rmap->rstart;
678     m   = mdn->A->rmap->n;
679     for (i=0; i<m; i++) {
680       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
681       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
682       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
683       row++;
684     }
685 
686     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
687     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
688     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
689     if (!rank) {
690       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
691       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
692     }
693     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
694     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
695     ierr = MatDestroy(&A);CHKERRQ(ierr);
696   }
697   PetscFunctionReturn(0);
698 }
699 
700 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
701 {
702   PetscErrorCode ierr;
703   PetscBool      iascii,isbinary,isdraw,issocket;
704 
705   PetscFunctionBegin;
706   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
707   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
708   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
709   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
710 
711   if (iascii || issocket || isdraw) {
712     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
713   } else if (isbinary) {
714     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
715   }
716   PetscFunctionReturn(0);
717 }
718 
719 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
720 {
721   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
722   Mat            mdn  = mat->A;
723   PetscErrorCode ierr;
724   PetscLogDouble isend[5],irecv[5];
725 
726   PetscFunctionBegin;
727   info->block_size = 1.0;
728 
729   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
730 
731   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
732   isend[3] = info->memory;  isend[4] = info->mallocs;
733   if (flag == MAT_LOCAL) {
734     info->nz_used      = isend[0];
735     info->nz_allocated = isend[1];
736     info->nz_unneeded  = isend[2];
737     info->memory       = isend[3];
738     info->mallocs      = isend[4];
739   } else if (flag == MAT_GLOBAL_MAX) {
740     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
741 
742     info->nz_used      = irecv[0];
743     info->nz_allocated = irecv[1];
744     info->nz_unneeded  = irecv[2];
745     info->memory       = irecv[3];
746     info->mallocs      = irecv[4];
747   } else if (flag == MAT_GLOBAL_SUM) {
748     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
749 
750     info->nz_used      = irecv[0];
751     info->nz_allocated = irecv[1];
752     info->nz_unneeded  = irecv[2];
753     info->memory       = irecv[3];
754     info->mallocs      = irecv[4];
755   }
756   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
757   info->fill_ratio_needed = 0;
758   info->factor_mallocs    = 0;
759   PetscFunctionReturn(0);
760 }
761 
762 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
763 {
764   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
765   PetscErrorCode ierr;
766 
767   PetscFunctionBegin;
768   switch (op) {
769   case MAT_NEW_NONZERO_LOCATIONS:
770   case MAT_NEW_NONZERO_LOCATION_ERR:
771   case MAT_NEW_NONZERO_ALLOCATION_ERR:
772     MatCheckPreallocated(A,1);
773     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
774     break;
775   case MAT_ROW_ORIENTED:
776     MatCheckPreallocated(A,1);
777     a->roworiented = flg;
778     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
779     break;
780   case MAT_NEW_DIAGONALS:
781   case MAT_KEEP_NONZERO_PATTERN:
782   case MAT_USE_HASH_TABLE:
783   case MAT_SORTED_FULL:
784     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
785     break;
786   case MAT_IGNORE_OFF_PROC_ENTRIES:
787     a->donotstash = flg;
788     break;
789   case MAT_SYMMETRIC:
790   case MAT_STRUCTURALLY_SYMMETRIC:
791   case MAT_HERMITIAN:
792   case MAT_SYMMETRY_ETERNAL:
793   case MAT_IGNORE_LOWER_TRIANGULAR:
794   case MAT_IGNORE_ZERO_ENTRIES:
795     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
796     break;
797   default:
798     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
799   }
800   PetscFunctionReturn(0);
801 }
802 
803 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
804 {
805   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
806   const PetscScalar *l;
807   PetscScalar       x,*v,*vv,*r;
808   PetscErrorCode    ierr;
809   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
810 
811   PetscFunctionBegin;
812   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
813   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
814   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
815   if (ll) {
816     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
817     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
818     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
819     for (i=0; i<m; i++) {
820       x = l[i];
821       v = vv + i;
822       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
823     }
824     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
825     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
826   }
827   if (rr) {
828     const PetscScalar *ar;
829 
830     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
831     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
832     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
833     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
834     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
835     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
836     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
837     for (i=0; i<n; i++) {
838       x = r[i];
839       v = vv + i*lda;
840       for (j=0; j<m; j++) (*v++) *= x;
841     }
842     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
843     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
844   }
845   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
850 {
851   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
852   PetscErrorCode    ierr;
853   PetscInt          i,j;
854   PetscMPIInt       size;
855   PetscReal         sum = 0.0;
856   const PetscScalar *av,*v;
857 
858   PetscFunctionBegin;
859   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
860   v    = av;
861   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
862   if (size == 1) {
863     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
864   } else {
865     if (type == NORM_FROBENIUS) {
866       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
867         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
868       }
869       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
870       *nrm = PetscSqrtReal(*nrm);
871       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
872     } else if (type == NORM_1) {
873       PetscReal *tmp,*tmp2;
874       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
875       *nrm = 0.0;
876       v    = av;
877       for (j=0; j<mdn->A->cmap->n; j++) {
878         for (i=0; i<mdn->A->rmap->n; i++) {
879           tmp[j] += PetscAbsScalar(*v);  v++;
880         }
881       }
882       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
883       for (j=0; j<A->cmap->N; j++) {
884         if (tmp2[j] > *nrm) *nrm = tmp2[j];
885       }
886       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
887       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
888     } else if (type == NORM_INFINITY) { /* max row norm */
889       PetscReal ntemp;
890       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
891       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
892     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
893   }
894   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
895   PetscFunctionReturn(0);
896 }
897 
898 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
899 {
900   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
901   Mat            B;
902   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
903   PetscErrorCode ierr;
904   PetscInt       j,i,lda;
905   PetscScalar    *v;
906 
907   PetscFunctionBegin;
908   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
909     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
910     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
911     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
912     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
913   } else B = *matout;
914 
915   m    = a->A->rmap->n; n = a->A->cmap->n;
916   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
917   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
918   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
919   for (i=0; i<m; i++) rwork[i] = rstart + i;
920   for (j=0; j<n; j++) {
921     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
922     v   += lda;
923   }
924   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
925   ierr = PetscFree(rwork);CHKERRQ(ierr);
926   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
927   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
928   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
929     *matout = B;
930   } else {
931     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
937 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
938 
939 PetscErrorCode MatSetUp_MPIDense(Mat A)
940 {
941   PetscErrorCode ierr;
942 
943   PetscFunctionBegin;
944   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
945   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
946   if (!A->preallocated) {
947     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
948   }
949   PetscFunctionReturn(0);
950 }
951 
952 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
953 {
954   PetscErrorCode ierr;
955   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
956 
957   PetscFunctionBegin;
958   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 PetscErrorCode MatConjugate_MPIDense(Mat mat)
963 {
964   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   ierr = MatConjugate(a->A);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatRealPart_MPIDense(Mat A)
973 {
974   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
975   PetscErrorCode ierr;
976 
977   PetscFunctionBegin;
978   ierr = MatRealPart(a->A);CHKERRQ(ierr);
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
983 {
984   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
993 {
994   PetscErrorCode ierr;
995   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
996 
997   PetscFunctionBegin;
998   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
999   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1000   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1005 
1006 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1007 {
1008   PetscErrorCode ierr;
1009   PetscInt       i,n;
1010   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1011   PetscReal      *work;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1015   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1016   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1017   if (type == NORM_2) {
1018     for (i=0; i<n; i++) work[i] *= work[i];
1019   }
1020   if (type == NORM_INFINITY) {
1021     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1022   } else {
1023     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1024   }
1025   ierr = PetscFree(work);CHKERRQ(ierr);
1026   if (type == NORM_2) {
1027     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1028   }
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 #if defined(PETSC_HAVE_CUDA)
1033 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1034 {
1035   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1036   PetscErrorCode ierr;
1037   PetscInt       lda;
1038 
1039   PetscFunctionBegin;
1040   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1041   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1042   if (!a->cvec) {
1043     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1044     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1045   }
1046   a->vecinuse = col + 1;
1047   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1048   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1049   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1050   *v   = a->cvec;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1055 {
1056   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1061   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1062   a->vecinuse = 0;
1063   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1064   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1065   *v   = NULL;
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1070 {
1071   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1072   PetscInt       lda;
1073   PetscErrorCode ierr;
1074 
1075   PetscFunctionBegin;
1076   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1077   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1078   if (!a->cvec) {
1079     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1080     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1081   }
1082   a->vecinuse = col + 1;
1083   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1084   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1085   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1086   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1087   *v   = a->cvec;
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1092 {
1093   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1098   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1099   a->vecinuse = 0;
1100   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1101   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1102   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1103   *v   = NULL;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1108 {
1109   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1110   PetscErrorCode ierr;
1111   PetscInt       lda;
1112 
1113   PetscFunctionBegin;
1114   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1115   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1116   if (!a->cvec) {
1117     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1118     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1119   }
1120   a->vecinuse = col + 1;
1121   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1122   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1123   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1124   *v   = a->cvec;
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1129 {
1130   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1135   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1136   a->vecinuse = 0;
1137   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1138   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1139   *v   = NULL;
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1144 {
1145   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1146   PetscErrorCode ierr;
1147 
1148   PetscFunctionBegin;
1149   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1150   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1151   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1156 {
1157   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1158   PetscErrorCode ierr;
1159 
1160   PetscFunctionBegin;
1161   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1162   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1163   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1168 {
1169   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1170   PetscErrorCode ierr;
1171 
1172   PetscFunctionBegin;
1173   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1174   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1175   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1180 {
1181   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1182   PetscErrorCode ierr;
1183 
1184   PetscFunctionBegin;
1185   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1190 {
1191   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1200 {
1201   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1210 {
1211   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1212   PetscErrorCode ierr;
1213 
1214   PetscFunctionBegin;
1215   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1220 {
1221   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1222   PetscErrorCode ierr;
1223 
1224   PetscFunctionBegin;
1225   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1230 {
1231   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1240 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1241 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1242 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1243 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1244 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1245 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1246 
1247 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1248 {
1249   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1254   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1255   if (d->A) {
1256     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1257   }
1258   mat->boundtocpu = bind;
1259   if (!bind) {
1260     PetscBool iscuda;
1261 
1262     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1263     if (!iscuda) {
1264       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1265     }
1266     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1267     if (!iscuda) {
1268       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1269     }
1270     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1271     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1272     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1273     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1274     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1275     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1276   } else {
1277     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1278     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1279     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1280     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1281     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1282     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1283   }
1284   if (d->cmat) {
1285     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1286   }
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1291 {
1292   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1293   PetscErrorCode ierr;
1294   PetscBool      iscuda;
1295 
1296   PetscFunctionBegin;
1297   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1298   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1299   if (!iscuda) PetscFunctionReturn(0);
1300   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1301   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1302   if (!d->A) {
1303     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1304     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1305     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1306   }
1307   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1308   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1309   A->preallocated = PETSC_TRUE;
1310   PetscFunctionReturn(0);
1311 }
1312 #endif
1313 
1314 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1315 {
1316   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1325 {
1326   PetscFunctionBegin;
1327   *missing = PETSC_FALSE;
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1332 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1333 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1334 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1335 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1336 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1337 
1338 /* -------------------------------------------------------------------*/
1339 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1340                                         MatGetRow_MPIDense,
1341                                         MatRestoreRow_MPIDense,
1342                                         MatMult_MPIDense,
1343                                 /*  4*/ MatMultAdd_MPIDense,
1344                                         MatMultTranspose_MPIDense,
1345                                         MatMultTransposeAdd_MPIDense,
1346                                         0,
1347                                         0,
1348                                         0,
1349                                 /* 10*/ 0,
1350                                         0,
1351                                         0,
1352                                         0,
1353                                         MatTranspose_MPIDense,
1354                                 /* 15*/ MatGetInfo_MPIDense,
1355                                         MatEqual_MPIDense,
1356                                         MatGetDiagonal_MPIDense,
1357                                         MatDiagonalScale_MPIDense,
1358                                         MatNorm_MPIDense,
1359                                 /* 20*/ MatAssemblyBegin_MPIDense,
1360                                         MatAssemblyEnd_MPIDense,
1361                                         MatSetOption_MPIDense,
1362                                         MatZeroEntries_MPIDense,
1363                                 /* 24*/ MatZeroRows_MPIDense,
1364                                         0,
1365                                         0,
1366                                         0,
1367                                         0,
1368                                 /* 29*/ MatSetUp_MPIDense,
1369                                         0,
1370                                         0,
1371                                         MatGetDiagonalBlock_MPIDense,
1372                                         0,
1373                                 /* 34*/ MatDuplicate_MPIDense,
1374                                         0,
1375                                         0,
1376                                         0,
1377                                         0,
1378                                 /* 39*/ MatAXPY_MPIDense,
1379                                         MatCreateSubMatrices_MPIDense,
1380                                         0,
1381                                         MatGetValues_MPIDense,
1382                                         0,
1383                                 /* 44*/ 0,
1384                                         MatScale_MPIDense,
1385                                         MatShift_Basic,
1386                                         0,
1387                                         0,
1388                                 /* 49*/ MatSetRandom_MPIDense,
1389                                         0,
1390                                         0,
1391                                         0,
1392                                         0,
1393                                 /* 54*/ 0,
1394                                         0,
1395                                         0,
1396                                         0,
1397                                         0,
1398                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1399                                         MatDestroy_MPIDense,
1400                                         MatView_MPIDense,
1401                                         0,
1402                                         0,
1403                                 /* 64*/ 0,
1404                                         0,
1405                                         0,
1406                                         0,
1407                                         0,
1408                                 /* 69*/ 0,
1409                                         0,
1410                                         0,
1411                                         0,
1412                                         0,
1413                                 /* 74*/ 0,
1414                                         0,
1415                                         0,
1416                                         0,
1417                                         0,
1418                                 /* 79*/ 0,
1419                                         0,
1420                                         0,
1421                                         0,
1422                                 /* 83*/ MatLoad_MPIDense,
1423                                         0,
1424                                         0,
1425                                         0,
1426                                         0,
1427                                         0,
1428                                 /* 89*/ 0,
1429                                         0,
1430                                         0,
1431                                         0,
1432                                         0,
1433                                 /* 94*/ 0,
1434                                         0,
1435                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1436                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1437                                         0,
1438                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1439                                         0,
1440                                         0,
1441                                         MatConjugate_MPIDense,
1442                                         0,
1443                                 /*104*/ 0,
1444                                         MatRealPart_MPIDense,
1445                                         MatImaginaryPart_MPIDense,
1446                                         0,
1447                                         0,
1448                                 /*109*/ 0,
1449                                         0,
1450                                         0,
1451                                         MatGetColumnVector_MPIDense,
1452                                         MatMissingDiagonal_MPIDense,
1453                                 /*114*/ 0,
1454                                         0,
1455                                         0,
1456                                         0,
1457                                         0,
1458                                 /*119*/ 0,
1459                                         0,
1460                                         0,
1461                                         0,
1462                                         0,
1463                                 /*124*/ 0,
1464                                         MatGetColumnNorms_MPIDense,
1465                                         0,
1466                                         0,
1467                                         0,
1468                                 /*129*/ 0,
1469                                         0,
1470                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1471                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1472                                         0,
1473                                 /*134*/ 0,
1474                                         0,
1475                                         0,
1476                                         0,
1477                                         0,
1478                                 /*139*/ 0,
1479                                         0,
1480                                         0,
1481                                         0,
1482                                         0,
1483                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1484                                 /*145*/ 0,
1485                                         0,
1486                                         0
1487 };
1488 
1489 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1490 {
1491   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1492   PetscBool      iscuda;
1493   PetscErrorCode ierr;
1494 
1495   PetscFunctionBegin;
1496   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1497   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1498   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1499   if (!a->A) {
1500     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1501     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1502     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1503   }
1504   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1505   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1506   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1507   mat->preallocated = PETSC_TRUE;
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 #if defined(PETSC_HAVE_ELEMENTAL)
1512 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1513 {
1514   Mat            mat_elemental;
1515   PetscErrorCode ierr;
1516   PetscScalar    *v;
1517   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1518 
1519   PetscFunctionBegin;
1520   if (reuse == MAT_REUSE_MATRIX) {
1521     mat_elemental = *newmat;
1522     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1523   } else {
1524     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1525     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1526     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1527     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1528     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1529   }
1530 
1531   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1532   for (i=0; i<N; i++) cols[i] = i;
1533   for (i=0; i<m; i++) rows[i] = rstart + i;
1534 
1535   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1536   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1537   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1538   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1539   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1540   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1541   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1542 
1543   if (reuse == MAT_INPLACE_MATRIX) {
1544     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1545   } else {
1546     *newmat = mat_elemental;
1547   }
1548   PetscFunctionReturn(0);
1549 }
1550 #endif
1551 
1552 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1553 {
1554   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1555   PetscErrorCode ierr;
1556 
1557   PetscFunctionBegin;
1558   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1563 {
1564   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1565   PetscErrorCode ierr;
1566 
1567   PetscFunctionBegin;
1568   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1573 {
1574   PetscErrorCode ierr;
1575   Mat_MPIDense   *mat;
1576   PetscInt       m,nloc,N;
1577 
1578   PetscFunctionBegin;
1579   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1580   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1581   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1582     PetscInt sum;
1583 
1584     if (n == PETSC_DECIDE) {
1585       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1586     }
1587     /* Check sum(n) = N */
1588     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1589     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1590 
1591     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1592   }
1593 
1594   /* numeric phase */
1595   mat = (Mat_MPIDense*)(*outmat)->data;
1596   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1597   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1598   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #if defined(PETSC_HAVE_CUDA)
1603 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1604 {
1605   Mat            B;
1606   Mat_MPIDense   *m;
1607   PetscErrorCode ierr;
1608 
1609   PetscFunctionBegin;
1610   if (reuse == MAT_INITIAL_MATRIX) {
1611     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1612   } else if (reuse == MAT_REUSE_MATRIX) {
1613     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1614   }
1615 
1616   B    = *newmat;
1617   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1618   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1619   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1620   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1622   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1624   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1626   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1627   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1629   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1630   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1632   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1633   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1635   m    = (Mat_MPIDense*)(B)->data;
1636   if (m->A) {
1637     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1638     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1639   }
1640   B->ops->bindtocpu = NULL;
1641   B->offloadmask    = PETSC_OFFLOAD_CPU;
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1646 {
1647   Mat            B;
1648   Mat_MPIDense   *m;
1649   PetscErrorCode ierr;
1650 
1651   PetscFunctionBegin;
1652   if (reuse == MAT_INITIAL_MATRIX) {
1653     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1654   } else if (reuse == MAT_REUSE_MATRIX) {
1655     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1656   }
1657 
1658   B    = *newmat;
1659   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1660   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1661   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1676   m    = (Mat_MPIDense*)(B)->data;
1677   if (m->A) {
1678     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1679     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1680     B->offloadmask = PETSC_OFFLOAD_BOTH;
1681   } else {
1682     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1683   }
1684   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1685 
1686   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1687   PetscFunctionReturn(0);
1688 }
1689 #endif
1690 
1691 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1692 {
1693   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1694   PetscErrorCode ierr;
1695   PetscInt       lda;
1696 
1697   PetscFunctionBegin;
1698   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1699   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1700   if (!a->cvec) {
1701     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1702   }
1703   a->vecinuse = col + 1;
1704   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1705   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1706   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1707   *v   = a->cvec;
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1712 {
1713   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1714   PetscErrorCode ierr;
1715 
1716   PetscFunctionBegin;
1717   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1718   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1719   a->vecinuse = 0;
1720   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1721   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1722   *v   = NULL;
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1727 {
1728   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1729   PetscErrorCode ierr;
1730   PetscInt       lda;
1731 
1732   PetscFunctionBegin;
1733   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1734   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1735   if (!a->cvec) {
1736     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1737   }
1738   a->vecinuse = col + 1;
1739   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1740   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1741   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1742   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1743   *v   = a->cvec;
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1748 {
1749   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1750   PetscErrorCode ierr;
1751 
1752   PetscFunctionBegin;
1753   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1754   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1755   a->vecinuse = 0;
1756   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1757   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1758   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1759   *v   = NULL;
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1764 {
1765   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1766   PetscErrorCode ierr;
1767   PetscInt       lda;
1768 
1769   PetscFunctionBegin;
1770   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1771   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1772   if (!a->cvec) {
1773     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1774   }
1775   a->vecinuse = col + 1;
1776   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1777   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1778   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1779   *v   = a->cvec;
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1784 {
1785   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1786   PetscErrorCode ierr;
1787 
1788   PetscFunctionBegin;
1789   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1790   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1791   a->vecinuse = 0;
1792   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1793   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1794   *v   = NULL;
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1799 {
1800   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1801   Mat_MPIDense   *c;
1802   PetscErrorCode ierr;
1803   MPI_Comm       comm;
1804   PetscBool      setup = PETSC_FALSE;
1805 
1806   PetscFunctionBegin;
1807   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1808   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1809   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1810   if (!a->cmat) {
1811     setup = PETSC_TRUE;
1812     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1813     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1814     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1815     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1816     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1817     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1818   } else if (cend-cbegin != a->cmat->cmap->N) {
1819     setup = PETSC_TRUE;
1820     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1821     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1822     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1823     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1824   }
1825   c = (Mat_MPIDense*)a->cmat->data;
1826   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1827   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1828   if (setup) { /* do we really need this? */
1829     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1830   }
1831   a->cmat->preallocated = PETSC_TRUE;
1832   a->cmat->assembled = PETSC_TRUE;
1833   a->matinuse = cbegin + 1;
1834   *v = a->cmat;
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1839 {
1840   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1841   Mat_MPIDense   *c;
1842   PetscErrorCode ierr;
1843 
1844   PetscFunctionBegin;
1845   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1846   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1847   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1848   a->matinuse = 0;
1849   c    = (Mat_MPIDense*)a->cmat->data;
1850   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1851   *v   = NULL;
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1856 {
1857   Mat_MPIDense   *a;
1858   PetscErrorCode ierr;
1859 
1860   PetscFunctionBegin;
1861   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1862   mat->data = (void*)a;
1863   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1864 
1865   mat->insertmode = NOT_SET_VALUES;
1866 
1867   /* build cache for off array entries formed */
1868   a->donotstash = PETSC_FALSE;
1869 
1870   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1871 
1872   /* stuff used for matrix vector multiply */
1873   a->lvec        = 0;
1874   a->Mvctx       = 0;
1875   a->roworiented = PETSC_TRUE;
1876 
1877   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1878   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1879   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1880   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1881   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1882   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1883   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1884   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1885   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1886   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1887   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1888   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1889   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1890   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1891   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1892   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1893   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1894   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1895   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1896 #if defined(PETSC_HAVE_ELEMENTAL)
1897   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1898 #endif
1899 #if defined(PETSC_HAVE_SCALAPACK)
1900   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1901 #endif
1902 #if defined(PETSC_HAVE_CUDA)
1903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1904 #endif
1905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1908 #if defined(PETSC_HAVE_CUDA)
1909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1911 #endif
1912 
1913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1915   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 /*MC
1920    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1921 
1922    Options Database Keys:
1923 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1924 
1925   Level: beginner
1926 
1927 .seealso:
1928 
1929 M*/
1930 #if defined(PETSC_HAVE_CUDA)
1931 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1932 {
1933   PetscErrorCode ierr;
1934 
1935   PetscFunctionBegin;
1936   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1937   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1938   PetscFunctionReturn(0);
1939 }
1940 #endif
1941 
1942 /*MC
1943    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1944 
1945    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1946    and MATMPIDENSE otherwise.
1947 
1948    Options Database Keys:
1949 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1950 
1951   Level: beginner
1952 
1953 
1954 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1955 M*/
1956 
1957 /*MC
1958    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1959 
1960    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1961    and MATMPIDENSECUDA otherwise.
1962 
1963    Options Database Keys:
1964 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1965 
1966   Level: beginner
1967 
1968 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1969 M*/
1970 
1971 /*@C
1972    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1973 
1974    Not collective
1975 
1976    Input Parameters:
1977 .  B - the matrix
1978 -  data - optional location of matrix data.  Set data=NULL for PETSc
1979    to control all matrix memory allocation.
1980 
1981    Notes:
1982    The dense format is fully compatible with standard Fortran 77
1983    storage by columns.
1984 
1985    The data input variable is intended primarily for Fortran programmers
1986    who wish to allocate their own matrix memory space.  Most users should
1987    set data=NULL.
1988 
1989    Level: intermediate
1990 
1991 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1992 @*/
1993 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1994 {
1995   PetscErrorCode ierr;
1996 
1997   PetscFunctionBegin;
1998   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1999   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 /*@
2004    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2005    array provided by the user. This is useful to avoid copying an array
2006    into a matrix
2007 
2008    Not Collective
2009 
2010    Input Parameters:
2011 +  mat - the matrix
2012 -  array - the array in column major order
2013 
2014    Notes:
2015    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2016    freed when the matrix is destroyed.
2017 
2018    Level: developer
2019 
2020 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2021 
2022 @*/
2023 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2024 {
2025   PetscErrorCode ierr;
2026 
2027   PetscFunctionBegin;
2028   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2029   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2030   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2031 #if defined(PETSC_HAVE_CUDA)
2032   mat->offloadmask = PETSC_OFFLOAD_CPU;
2033 #endif
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 /*@
2038    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2039 
2040    Not Collective
2041 
2042    Input Parameters:
2043 .  mat - the matrix
2044 
2045    Notes:
2046    You can only call this after a call to MatDensePlaceArray()
2047 
2048    Level: developer
2049 
2050 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2051 
2052 @*/
2053 PetscErrorCode  MatDenseResetArray(Mat mat)
2054 {
2055   PetscErrorCode ierr;
2056 
2057   PetscFunctionBegin;
2058   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2059   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2060   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 /*@
2065    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2066    array provided by the user. This is useful to avoid copying an array
2067    into a matrix
2068 
2069    Not Collective
2070 
2071    Input Parameters:
2072 +  mat - the matrix
2073 -  array - the array in column major order
2074 
2075    Notes:
2076    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2077    freed by the user. It will be freed when the matrix is destroyed.
2078 
2079    Level: developer
2080 
2081 .seealso: MatDenseGetArray(), VecReplaceArray()
2082 @*/
2083 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2084 {
2085   PetscErrorCode ierr;
2086 
2087   PetscFunctionBegin;
2088   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2089   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2090   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2091 #if defined(PETSC_HAVE_CUDA)
2092   mat->offloadmask = PETSC_OFFLOAD_CPU;
2093 #endif
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #if defined(PETSC_HAVE_CUDA)
2098 /*@C
2099    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2100    array provided by the user. This is useful to avoid copying an array
2101    into a matrix
2102 
2103    Not Collective
2104 
2105    Input Parameters:
2106 +  mat - the matrix
2107 -  array - the array in column major order
2108 
2109    Notes:
2110    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2111    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2112 
2113    Level: developer
2114 
2115 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2116 @*/
2117 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2118 {
2119   PetscErrorCode ierr;
2120 
2121   PetscFunctionBegin;
2122   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2123   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2124   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2125   mat->offloadmask = PETSC_OFFLOAD_GPU;
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 /*@C
2130    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2131 
2132    Not Collective
2133 
2134    Input Parameters:
2135 .  mat - the matrix
2136 
2137    Notes:
2138    You can only call this after a call to MatDenseCUDAPlaceArray()
2139 
2140    Level: developer
2141 
2142 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2143 
2144 @*/
2145 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2146 {
2147   PetscErrorCode ierr;
2148 
2149   PetscFunctionBegin;
2150   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2151   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2152   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 /*@C
2157    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2158    array provided by the user. This is useful to avoid copying an array
2159    into a matrix
2160 
2161    Not Collective
2162 
2163    Input Parameters:
2164 +  mat - the matrix
2165 -  array - the array in column major order
2166 
2167    Notes:
2168    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2169    The memory passed in CANNOT be freed by the user. It will be freed
2170    when the matrix is destroyed. The array should respect the matrix leading dimension.
2171 
2172    Level: developer
2173 
2174 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2175 @*/
2176 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2177 {
2178   PetscErrorCode ierr;
2179 
2180   PetscFunctionBegin;
2181   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2182   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2183   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2184   mat->offloadmask = PETSC_OFFLOAD_GPU;
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 /*@C
2189    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2190 
2191    Not Collective
2192 
2193    Input Parameters:
2194 .  A - the matrix
2195 
2196    Output Parameters
2197 .  array - the GPU array in column major order
2198 
2199    Notes:
2200    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2201 
2202    Level: developer
2203 
2204 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2205 @*/
2206 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2207 {
2208   PetscErrorCode ierr;
2209 
2210   PetscFunctionBegin;
2211   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2212   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2213   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 /*@C
2218    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2219 
2220    Not Collective
2221 
2222    Input Parameters:
2223 +  A - the matrix
2224 -  array - the GPU array in column major order
2225 
2226    Notes:
2227 
2228    Level: developer
2229 
2230 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2231 @*/
2232 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2233 {
2234   PetscErrorCode ierr;
2235 
2236   PetscFunctionBegin;
2237   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2238   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2239   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2240   A->offloadmask = PETSC_OFFLOAD_GPU;
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 /*@C
2245    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2246 
2247    Not Collective
2248 
2249    Input Parameters:
2250 .  A - the matrix
2251 
2252    Output Parameters
2253 .  array - the GPU array in column major order
2254 
2255    Notes:
2256    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2257 
2258    Level: developer
2259 
2260 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2261 @*/
2262 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2263 {
2264   PetscErrorCode ierr;
2265 
2266   PetscFunctionBegin;
2267   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2268   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 /*@C
2273    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2274 
2275    Not Collective
2276 
2277    Input Parameters:
2278 +  A - the matrix
2279 -  array - the GPU array in column major order
2280 
2281    Notes:
2282    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2283 
2284    Level: developer
2285 
2286 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2287 @*/
2288 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2289 {
2290   PetscErrorCode ierr;
2291 
2292   PetscFunctionBegin;
2293   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 /*@C
2298    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2299 
2300    Not Collective
2301 
2302    Input Parameters:
2303 .  A - the matrix
2304 
2305    Output Parameters
2306 .  array - the GPU array in column major order
2307 
2308    Notes:
2309    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2310 
2311    Level: developer
2312 
2313 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2314 @*/
2315 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2316 {
2317   PetscErrorCode ierr;
2318 
2319   PetscFunctionBegin;
2320   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2321   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2322   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 /*@C
2327    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2328 
2329    Not Collective
2330 
2331    Input Parameters:
2332 +  A - the matrix
2333 -  array - the GPU array in column major order
2334 
2335    Notes:
2336 
2337    Level: developer
2338 
2339 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2340 @*/
2341 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2342 {
2343   PetscErrorCode ierr;
2344 
2345   PetscFunctionBegin;
2346   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2347   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2348   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2349   A->offloadmask = PETSC_OFFLOAD_GPU;
2350   PetscFunctionReturn(0);
2351 }
2352 #endif
2353 
2354 /*@C
2355    MatCreateDense - Creates a matrix in dense format.
2356 
2357    Collective
2358 
2359    Input Parameters:
2360 +  comm - MPI communicator
2361 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2362 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2363 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2364 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2365 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2366    to control all matrix memory allocation.
2367 
2368    Output Parameter:
2369 .  A - the matrix
2370 
2371    Notes:
2372    The dense format is fully compatible with standard Fortran 77
2373    storage by columns.
2374 
2375    The data input variable is intended primarily for Fortran programmers
2376    who wish to allocate their own matrix memory space.  Most users should
2377    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2378 
2379    The user MUST specify either the local or global matrix dimensions
2380    (possibly both).
2381 
2382    Level: intermediate
2383 
2384 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2385 @*/
2386 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2387 {
2388   PetscErrorCode ierr;
2389   PetscMPIInt    size;
2390 
2391   PetscFunctionBegin;
2392   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2393   PetscValidLogicalCollectiveBool(*A,!!data,6);
2394   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2395   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2396   if (size > 1) {
2397     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2398     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2399     if (data) {  /* user provided data array, so no need to assemble */
2400       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2401       (*A)->assembled = PETSC_TRUE;
2402     }
2403   } else {
2404     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2405     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2406   }
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 #if defined(PETSC_HAVE_CUDA)
2411 /*@C
2412    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2413 
2414    Collective
2415 
2416    Input Parameters:
2417 +  comm - MPI communicator
2418 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2419 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2420 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2421 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2422 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2423    to control matrix memory allocation.
2424 
2425    Output Parameter:
2426 .  A - the matrix
2427 
2428    Notes:
2429 
2430    Level: intermediate
2431 
2432 .seealso: MatCreate(), MatCreateDense()
2433 @*/
2434 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2435 {
2436   PetscErrorCode ierr;
2437   PetscMPIInt    size;
2438 
2439   PetscFunctionBegin;
2440   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2441   PetscValidLogicalCollectiveBool(*A,!!data,6);
2442   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2443   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2444   if (size > 1) {
2445     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2446     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2447     if (data) {  /* user provided data array, so no need to assemble */
2448       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2449       (*A)->assembled = PETSC_TRUE;
2450     }
2451   } else {
2452     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2453     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2454   }
2455   PetscFunctionReturn(0);
2456 }
2457 #endif
2458 
2459 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2460 {
2461   Mat            mat;
2462   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2463   PetscErrorCode ierr;
2464 
2465   PetscFunctionBegin;
2466   *newmat = 0;
2467   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2468   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2469   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2470   a       = (Mat_MPIDense*)mat->data;
2471 
2472   mat->factortype   = A->factortype;
2473   mat->assembled    = PETSC_TRUE;
2474   mat->preallocated = PETSC_TRUE;
2475 
2476   mat->insertmode = NOT_SET_VALUES;
2477   a->donotstash   = oldmat->donotstash;
2478 
2479   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2480   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2481 
2482   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2483   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2484   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2485 
2486   *newmat = mat;
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2491 {
2492   PetscErrorCode ierr;
2493   PetscBool      isbinary;
2494 #if defined(PETSC_HAVE_HDF5)
2495   PetscBool      ishdf5;
2496 #endif
2497 
2498   PetscFunctionBegin;
2499   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2500   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2501   /* force binary viewer to load .info file if it has not yet done so */
2502   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2503   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2504 #if defined(PETSC_HAVE_HDF5)
2505   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2506 #endif
2507   if (isbinary) {
2508     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2509 #if defined(PETSC_HAVE_HDF5)
2510   } else if (ishdf5) {
2511     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2512 #endif
2513   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2518 {
2519   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2520   Mat            a,b;
2521   PetscBool      flg;
2522   PetscErrorCode ierr;
2523 
2524   PetscFunctionBegin;
2525   a    = matA->A;
2526   b    = matB->A;
2527   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2528   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2529   PetscFunctionReturn(0);
2530 }
2531 
2532 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2533 {
2534   PetscErrorCode        ierr;
2535   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2536 
2537   PetscFunctionBegin;
2538   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2539   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2540   ierr = PetscFree(atb);CHKERRQ(ierr);
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2545 {
2546   PetscErrorCode        ierr;
2547   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2548 
2549   PetscFunctionBegin;
2550   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2551   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2552   ierr = PetscFree(abt);CHKERRQ(ierr);
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2557 {
2558   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2559   Mat_TransMatMultDense *atb;
2560   PetscErrorCode        ierr;
2561   MPI_Comm              comm;
2562   PetscMPIInt           size,*recvcounts;
2563   PetscScalar           *carray,*sendbuf;
2564   const PetscScalar     *atbarray;
2565   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2566   const PetscInt        *ranges;
2567 
2568   PetscFunctionBegin;
2569   MatCheckProduct(C,3);
2570   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2571   atb = (Mat_TransMatMultDense *)C->product->data;
2572   recvcounts = atb->recvcounts;
2573   sendbuf = atb->sendbuf;
2574 
2575   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2576   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2577 
2578   /* compute atbarray = aseq^T * bseq */
2579   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2580 
2581   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2582 
2583   /* arrange atbarray into sendbuf */
2584   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2585   for (proc=0, k=0; proc<size; proc++) {
2586     for (j=0; j<cN; j++) {
2587       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2588     }
2589   }
2590   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2591 
2592   /* sum all atbarray to local values of C */
2593   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2594   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2595   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2596   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2597   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2602 {
2603   PetscErrorCode        ierr;
2604   MPI_Comm              comm;
2605   PetscMPIInt           size;
2606   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2607   Mat_TransMatMultDense *atb;
2608   PetscBool             cisdense;
2609   PetscInt              i;
2610   const PetscInt        *ranges;
2611 
2612   PetscFunctionBegin;
2613   MatCheckProduct(C,3);
2614   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2615   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2616   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2617     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2618   }
2619 
2620   /* create matrix product C */
2621   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2622   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2623   if (!cisdense) {
2624     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2625   }
2626   ierr = MatSetUp(C);CHKERRQ(ierr);
2627 
2628   /* create data structure for reuse C */
2629   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2630   ierr = PetscNew(&atb);CHKERRQ(ierr);
2631   cM   = C->rmap->N;
2632   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2633   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2634   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2635 
2636   C->product->data    = atb;
2637   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2638   PetscFunctionReturn(0);
2639 }
2640 
2641 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2642 {
2643   PetscErrorCode        ierr;
2644   MPI_Comm              comm;
2645   PetscMPIInt           i, size;
2646   PetscInt              maxRows, bufsiz;
2647   PetscMPIInt           tag;
2648   PetscInt              alg;
2649   Mat_MatTransMultDense *abt;
2650   Mat_Product           *product = C->product;
2651   PetscBool             flg;
2652 
2653   PetscFunctionBegin;
2654   MatCheckProduct(C,4);
2655   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2656   /* check local size of A and B */
2657   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2658 
2659   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2660   alg  = flg ? 0 : 1;
2661 
2662   /* setup matrix product C */
2663   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2664   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2665   ierr = MatSetUp(C);CHKERRQ(ierr);
2666   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2667 
2668   /* create data structure for reuse C */
2669   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2670   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2671   ierr = PetscNew(&abt);CHKERRQ(ierr);
2672   abt->tag = tag;
2673   abt->alg = alg;
2674   switch (alg) {
2675   case 1: /* alg: "cyclic" */
2676     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2677     bufsiz = A->cmap->N * maxRows;
2678     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2679     break;
2680   default: /* alg: "allgatherv" */
2681     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2682     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2683     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2684     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2685     break;
2686   }
2687 
2688   C->product->data    = abt;
2689   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2690   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2695 {
2696   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2697   Mat_MatTransMultDense *abt;
2698   PetscErrorCode        ierr;
2699   MPI_Comm              comm;
2700   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2701   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2702   PetscInt              i,cK=A->cmap->N,k,j,bn;
2703   PetscScalar           _DOne=1.0,_DZero=0.0;
2704   const PetscScalar     *av,*bv;
2705   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2706   MPI_Request           reqs[2];
2707   const PetscInt        *ranges;
2708 
2709   PetscFunctionBegin;
2710   MatCheckProduct(C,3);
2711   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2712   abt  = (Mat_MatTransMultDense*)C->product->data;
2713   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2714   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2715   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2716   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2717   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2718   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2719   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2720   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2721   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2722   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2723   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2724   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2725   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2726   bn   = B->rmap->n;
2727   if (blda == bn) {
2728     sendbuf = (PetscScalar*)bv;
2729   } else {
2730     sendbuf = abt->buf[0];
2731     for (k = 0, i = 0; i < cK; i++) {
2732       for (j = 0; j < bn; j++, k++) {
2733         sendbuf[k] = bv[i * blda + j];
2734       }
2735     }
2736   }
2737   if (size > 1) {
2738     sendto = (rank + size - 1) % size;
2739     recvfrom = (rank + size + 1) % size;
2740   } else {
2741     sendto = recvfrom = 0;
2742   }
2743   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2744   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2745   recvisfrom = rank;
2746   for (i = 0; i < size; i++) {
2747     /* we have finished receiving in sending, bufs can be read/modified */
2748     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2749     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2750 
2751     if (nextrecvisfrom != rank) {
2752       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2753       sendsiz = cK * bn;
2754       recvsiz = cK * nextbn;
2755       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2756       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2757       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2758     }
2759 
2760     /* local aseq * sendbuf^T */
2761     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2762     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2763 
2764     if (nextrecvisfrom != rank) {
2765       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2766       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2767     }
2768     bn = nextbn;
2769     recvisfrom = nextrecvisfrom;
2770     sendbuf = recvbuf;
2771   }
2772   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2773   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2774   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2775   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2776   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2781 {
2782   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2783   Mat_MatTransMultDense *abt;
2784   PetscErrorCode        ierr;
2785   MPI_Comm              comm;
2786   PetscMPIInt           size;
2787   PetscScalar           *cv, *sendbuf, *recvbuf;
2788   const PetscScalar     *av,*bv;
2789   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2790   PetscScalar           _DOne=1.0,_DZero=0.0;
2791   PetscBLASInt          cm, cn, ck, alda, clda;
2792 
2793   PetscFunctionBegin;
2794   MatCheckProduct(C,3);
2795   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2796   abt  = (Mat_MatTransMultDense*)C->product->data;
2797   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2798   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2799   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2800   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2801   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2802   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2803   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2804   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2805   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2806   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2807   /* copy transpose of B into buf[0] */
2808   bn      = B->rmap->n;
2809   sendbuf = abt->buf[0];
2810   recvbuf = abt->buf[1];
2811   for (k = 0, j = 0; j < bn; j++) {
2812     for (i = 0; i < cK; i++, k++) {
2813       sendbuf[k] = bv[i * blda + j];
2814     }
2815   }
2816   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2817   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2818   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2819   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2820   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2821   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2822   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2823   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2824   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2825   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2826   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2831 {
2832   Mat_MatTransMultDense *abt;
2833   PetscErrorCode        ierr;
2834 
2835   PetscFunctionBegin;
2836   MatCheckProduct(C,3);
2837   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2838   abt = (Mat_MatTransMultDense*)C->product->data;
2839   switch (abt->alg) {
2840   case 1:
2841     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2842     break;
2843   default:
2844     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2845     break;
2846   }
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2851 {
2852   PetscErrorCode   ierr;
2853   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2854 
2855   PetscFunctionBegin;
2856   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2857   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2858   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2859   ierr = PetscFree(ab);CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 #if defined(PETSC_HAVE_ELEMENTAL)
2864 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2865 {
2866   PetscErrorCode   ierr;
2867   Mat_MatMultDense *ab;
2868 
2869   PetscFunctionBegin;
2870   MatCheckProduct(C,3);
2871   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2872   ab   = (Mat_MatMultDense*)C->product->data;
2873   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2874   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2875   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2876   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2881 {
2882   PetscErrorCode   ierr;
2883   Mat              Ae,Be,Ce;
2884   Mat_MatMultDense *ab;
2885 
2886   PetscFunctionBegin;
2887   MatCheckProduct(C,4);
2888   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2889   /* check local size of A and B */
2890   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2891     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2892   }
2893 
2894   /* create elemental matrices Ae and Be */
2895   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2896   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2897   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2898   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2899   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2900 
2901   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2902   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2903   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2904   ierr = MatSetUp(Be);CHKERRQ(ierr);
2905   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2906 
2907   /* compute symbolic Ce = Ae*Be */
2908   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2909   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2910 
2911   /* setup C */
2912   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2913   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2914   ierr = MatSetUp(C);CHKERRQ(ierr);
2915 
2916   /* create data structure for reuse Cdense */
2917   ierr = PetscNew(&ab);CHKERRQ(ierr);
2918   ab->Ae = Ae;
2919   ab->Be = Be;
2920   ab->Ce = Ce;
2921 
2922   C->product->data    = ab;
2923   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2924   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2925   PetscFunctionReturn(0);
2926 }
2927 #endif
2928 /* ----------------------------------------------- */
2929 #if defined(PETSC_HAVE_ELEMENTAL)
2930 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2931 {
2932   PetscFunctionBegin;
2933   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2934   C->ops->productsymbolic = MatProductSymbolic_AB;
2935   PetscFunctionReturn(0);
2936 }
2937 #endif
2938 
2939 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2940 {
2941   Mat_Product *product = C->product;
2942   Mat         A = product->A,B=product->B;
2943 
2944   PetscFunctionBegin;
2945   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2946     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2947   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2948   C->ops->productsymbolic = MatProductSymbolic_AtB;
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2953 {
2954   PetscErrorCode ierr;
2955   Mat_Product    *product = C->product;
2956   const char     *algTypes[2] = {"allgatherv","cyclic"};
2957   PetscInt       alg,nalg = 2;
2958   PetscBool      flg = PETSC_FALSE;
2959 
2960   PetscFunctionBegin;
2961   /* Set default algorithm */
2962   alg = 0; /* default is allgatherv */
2963   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2964   if (flg) {
2965     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2966   }
2967 
2968   /* Get runtime option */
2969   if (product->api_user) {
2970     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2971     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2972     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2973   } else {
2974     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2975     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2976     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2977   }
2978   if (flg) {
2979     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2980   }
2981 
2982   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2983   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2988 {
2989   PetscErrorCode ierr;
2990   Mat_Product    *product = C->product;
2991 
2992   PetscFunctionBegin;
2993   switch (product->type) {
2994 #if defined(PETSC_HAVE_ELEMENTAL)
2995   case MATPRODUCT_AB:
2996     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2997     break;
2998 #endif
2999   case MATPRODUCT_AtB:
3000     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3001     break;
3002   case MATPRODUCT_ABt:
3003     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3004     break;
3005   default:
3006     break;
3007   }
3008   PetscFunctionReturn(0);
3009 }
3010