xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision b5675b0fb40a1b9337ff4b50b8a320c50b3df377)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
40 {
41   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
44 
45   PetscFunctionBegin;
46   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
47   lrow = row - rstart;
48   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscErrorCode ierr;
56   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
57 
58   PetscFunctionBegin;
59   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
60   lrow = row - rstart;
61   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
66 {
67   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
68   PetscErrorCode ierr;
69   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
70   PetscScalar    *array;
71   MPI_Comm       comm;
72   PetscBool      flg;
73   Mat            B;
74 
75   PetscFunctionBegin;
76   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
77   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
78   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
79   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
80 
81     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
82     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
100 {
101   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
102   PetscErrorCode ierr;
103   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
104   PetscBool      roworiented = A->roworiented;
105 
106   PetscFunctionBegin;
107   for (i=0; i<m; i++) {
108     if (idxm[i] < 0) continue;
109     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
110     if (idxm[i] >= rstart && idxm[i] < rend) {
111       row = idxm[i] - rstart;
112       if (roworiented) {
113         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
114       } else {
115         for (j=0; j<n; j++) {
116           if (idxn[j] < 0) continue;
117           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
118           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
119         }
120       }
121     } else if (!A->donotstash) {
122       mat->assembled = PETSC_FALSE;
123       if (roworiented) {
124         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
125       } else {
126         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
127       }
128     }
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
134 {
135   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
136   PetscErrorCode ierr;
137   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
142     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       for (j=0; j<n; j++) {
146         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
147         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
148         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
149       }
150     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
156 {
157   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
166 {
167   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
168   PetscBool      iscuda;
169   PetscErrorCode ierr;
170 
171   PetscFunctionBegin;
172   if (!a->A) {
173     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
174     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
175     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
176     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
177     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
178     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
179     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
180     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
181   }
182   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
187 {
188   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
189   PetscErrorCode ierr;
190 
191   PetscFunctionBegin;
192   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
193   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
204   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211   PetscErrorCode ierr;
212 
213   PetscFunctionBegin;
214   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
215   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
220 {
221   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
222   PetscErrorCode ierr;
223 
224   PetscFunctionBegin;
225   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
226   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
227   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
232 {
233   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
234   PetscErrorCode ierr;
235 
236   PetscFunctionBegin;
237   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
238   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
239   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
240   PetscFunctionReturn(0);
241 }
242 
243 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
244 {
245   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
246   PetscErrorCode ierr;
247 
248   PetscFunctionBegin;
249   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
250   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
251   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
256 {
257   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
258   PetscErrorCode    ierr;
259   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
260   const PetscInt    *irow,*icol;
261   const PetscScalar *v;
262   PetscScalar       *bv;
263   Mat               newmat;
264   IS                iscol_local;
265   MPI_Comm          comm_is,comm_mat;
266 
267   PetscFunctionBegin;
268   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
269   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
270   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
271 
272   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
273   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
274   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
275   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
276   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
277   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
278 
279   /* No parallel redistribution currently supported! Should really check each index set
280      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
281      original matrix! */
282 
283   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
284   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
285 
286   /* Check submatrix call */
287   if (scall == MAT_REUSE_MATRIX) {
288     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
289     /* Really need to test rows and column sizes! */
290     newmat = *B;
291   } else {
292     /* Create and fill new matrix */
293     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
294     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
295     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
296     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
297   }
298 
299   /* Now extract the data pointers and do the copy, column at a time */
300   newmatd = (Mat_MPIDense*)newmat->data;
301   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
302   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
303   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
304   for (i=0; i<Ncols; i++) {
305     const PetscScalar *av = v + lda*icol[i];
306     for (j=0; j<nrows; j++) {
307       *bv++ = av[irow[j] - rstart];
308     }
309   }
310   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
311   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
312 
313   /* Assemble the matrices so that the correct flags are set */
314   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
316 
317   /* Free work space */
318   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
319   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
320   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
321   *B   = newmat;
322   PetscFunctionReturn(0);
323 }
324 
325 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
326 {
327   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
328   PetscErrorCode ierr;
329 
330   PetscFunctionBegin;
331   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
336 {
337   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
338   PetscErrorCode ierr;
339 
340   PetscFunctionBegin;
341   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
346 {
347   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
348   PetscErrorCode ierr;
349 
350   PetscFunctionBegin;
351   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
352   PetscFunctionReturn(0);
353 }
354 
355 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
356 {
357   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
358   PetscErrorCode ierr;
359   PetscInt       nstash,reallocs;
360 
361   PetscFunctionBegin;
362   if (mdn->donotstash || mat->nooffprocentries) return(0);
363 
364   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
365   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
366   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
371 {
372   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
373   PetscErrorCode ierr;
374   PetscInt       i,*row,*col,flg,j,rstart,ncols;
375   PetscMPIInt    n;
376   PetscScalar    *val;
377 
378   PetscFunctionBegin;
379   if (!mdn->donotstash && !mat->nooffprocentries) {
380     /*  wait on receives */
381     while (1) {
382       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
383       if (!flg) break;
384 
385       for (i=0; i<n;) {
386         /* Now identify the consecutive vals belonging to the same row */
387         for (j=i,rstart=row[j]; j<n; j++) {
388           if (row[j] != rstart) break;
389         }
390         if (j < n) ncols = j-i;
391         else       ncols = n-i;
392         /* Now assemble all these values with a single function call */
393         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
394         i    = j;
395       }
396     }
397     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
398   }
399 
400   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
402 
403   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
404     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
405   }
406   PetscFunctionReturn(0);
407 }
408 
409 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
410 {
411   PetscErrorCode ierr;
412   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
413 
414   PetscFunctionBegin;
415   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
420 {
421   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
422   PetscErrorCode    ierr;
423   PetscInt          i,len,*lrows;
424 
425   PetscFunctionBegin;
426   /* get locally owned rows */
427   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
428   /* fix right hand side if needed */
429   if (x && b) {
430     const PetscScalar *xx;
431     PetscScalar       *bb;
432 
433     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
434     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
435     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
436     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
437     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
438   }
439   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
440   if (diag != 0.0) {
441     Vec d;
442 
443     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
444     ierr = VecSet(d,diag);CHKERRQ(ierr);
445     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
446     ierr = VecDestroy(&d);CHKERRQ(ierr);
447   }
448   ierr = PetscFree(lrows);CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
453 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
454 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
455 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
456 
457 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
458 {
459   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
460   PetscErrorCode    ierr;
461   const PetscScalar *ax;
462   PetscScalar       *ay;
463 
464   PetscFunctionBegin;
465   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
466   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
467   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
468   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
469   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
470   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
471   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
476 {
477   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
478   PetscErrorCode    ierr;
479   const PetscScalar *ax;
480   PetscScalar       *ay;
481 
482   PetscFunctionBegin;
483   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
484   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
485   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
486   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
487   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
488   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
489   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
494 {
495   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
496   PetscErrorCode    ierr;
497   const PetscScalar *ax;
498   PetscScalar       *ay;
499 
500   PetscFunctionBegin;
501   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
502   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
503   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
504   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
505   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
506   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
507   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
508   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
513 {
514   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
515   PetscErrorCode    ierr;
516   const PetscScalar *ax;
517   PetscScalar       *ay;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
523   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
524   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
525   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
526   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
527   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
528   PetscFunctionReturn(0);
529 }
530 
531 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
532 {
533   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
534   PetscErrorCode    ierr;
535   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
536   PetscScalar       *x,zero = 0.0;
537   const PetscScalar *av;
538 
539   PetscFunctionBegin;
540   ierr = VecSet(v,zero);CHKERRQ(ierr);
541   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
542   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
543   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
544   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
545   radd = A->rmap->rstart*m;
546   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
547   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
548   for (i=0; i<len; i++) {
549     x[i] = av[radd + i*lda + i];
550   }
551   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
552   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
553   PetscFunctionReturn(0);
554 }
555 
556 PetscErrorCode MatDestroy_MPIDense(Mat mat)
557 {
558   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
559   PetscErrorCode ierr;
560 
561   PetscFunctionBegin;
562 #if defined(PETSC_USE_LOG)
563   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
564 #endif
565   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
566   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
567   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
568   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
569   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
570   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
571   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
572   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
573 
574   ierr = PetscFree(mat->data);CHKERRQ(ierr);
575   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
576 
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
588 #if defined(PETSC_HAVE_ELEMENTAL)
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
590 #endif
591 #if defined(PETSC_HAVE_SCALAPACK)
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
593 #endif
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
597 #if defined (PETSC_HAVE_CUDA)
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
600 #endif
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
609   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
610   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
611   PetscFunctionReturn(0);
612 }
613 
614 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
615 
616 #include <petscdraw.h>
617 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
618 {
619   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
620   PetscErrorCode    ierr;
621   PetscMPIInt       rank;
622   PetscViewerType   vtype;
623   PetscBool         iascii,isdraw;
624   PetscViewer       sviewer;
625   PetscViewerFormat format;
626 
627   PetscFunctionBegin;
628   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
629   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
630   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
631   if (iascii) {
632     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
633     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
634     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
635       MatInfo info;
636       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
637       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
638       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
639       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
640       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
641       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
642       PetscFunctionReturn(0);
643     } else if (format == PETSC_VIEWER_ASCII_INFO) {
644       PetscFunctionReturn(0);
645     }
646   } else if (isdraw) {
647     PetscDraw draw;
648     PetscBool isnull;
649 
650     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
651     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
652     if (isnull) PetscFunctionReturn(0);
653   }
654 
655   {
656     /* assemble the entire matrix onto first processor. */
657     Mat         A;
658     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
659     PetscInt    *cols;
660     PetscScalar *vals;
661 
662     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
663     if (!rank) {
664       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
665     } else {
666       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
667     }
668     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
669     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
670     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
671     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
672 
673     /* Copy the matrix ... This isn't the most efficient means,
674        but it's quick for now */
675     A->insertmode = INSERT_VALUES;
676 
677     row = mat->rmap->rstart;
678     m   = mdn->A->rmap->n;
679     for (i=0; i<m; i++) {
680       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
681       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
682       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
683       row++;
684     }
685 
686     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
687     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
688     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
689     if (!rank) {
690       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
691       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
692     }
693     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
694     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
695     ierr = MatDestroy(&A);CHKERRQ(ierr);
696   }
697   PetscFunctionReturn(0);
698 }
699 
700 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
701 {
702   PetscErrorCode ierr;
703   PetscBool      iascii,isbinary,isdraw,issocket;
704 
705   PetscFunctionBegin;
706   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
707   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
708   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
709   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
710 
711   if (iascii || issocket || isdraw) {
712     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
713   } else if (isbinary) {
714     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
715   }
716   PetscFunctionReturn(0);
717 }
718 
719 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
720 {
721   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
722   Mat            mdn  = mat->A;
723   PetscErrorCode ierr;
724   PetscLogDouble isend[5],irecv[5];
725 
726   PetscFunctionBegin;
727   info->block_size = 1.0;
728 
729   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
730 
731   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
732   isend[3] = info->memory;  isend[4] = info->mallocs;
733   if (flag == MAT_LOCAL) {
734     info->nz_used      = isend[0];
735     info->nz_allocated = isend[1];
736     info->nz_unneeded  = isend[2];
737     info->memory       = isend[3];
738     info->mallocs      = isend[4];
739   } else if (flag == MAT_GLOBAL_MAX) {
740     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
741 
742     info->nz_used      = irecv[0];
743     info->nz_allocated = irecv[1];
744     info->nz_unneeded  = irecv[2];
745     info->memory       = irecv[3];
746     info->mallocs      = irecv[4];
747   } else if (flag == MAT_GLOBAL_SUM) {
748     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
749 
750     info->nz_used      = irecv[0];
751     info->nz_allocated = irecv[1];
752     info->nz_unneeded  = irecv[2];
753     info->memory       = irecv[3];
754     info->mallocs      = irecv[4];
755   }
756   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
757   info->fill_ratio_needed = 0;
758   info->factor_mallocs    = 0;
759   PetscFunctionReturn(0);
760 }
761 
762 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
763 {
764   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
765   PetscErrorCode ierr;
766 
767   PetscFunctionBegin;
768   switch (op) {
769   case MAT_NEW_NONZERO_LOCATIONS:
770   case MAT_NEW_NONZERO_LOCATION_ERR:
771   case MAT_NEW_NONZERO_ALLOCATION_ERR:
772     MatCheckPreallocated(A,1);
773     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
774     break;
775   case MAT_ROW_ORIENTED:
776     MatCheckPreallocated(A,1);
777     a->roworiented = flg;
778     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
779     break;
780   case MAT_NEW_DIAGONALS:
781   case MAT_KEEP_NONZERO_PATTERN:
782   case MAT_USE_HASH_TABLE:
783   case MAT_SORTED_FULL:
784     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
785     break;
786   case MAT_IGNORE_OFF_PROC_ENTRIES:
787     a->donotstash = flg;
788     break;
789   case MAT_SYMMETRIC:
790   case MAT_STRUCTURALLY_SYMMETRIC:
791   case MAT_HERMITIAN:
792   case MAT_SYMMETRY_ETERNAL:
793   case MAT_IGNORE_LOWER_TRIANGULAR:
794   case MAT_IGNORE_ZERO_ENTRIES:
795     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
796     break;
797   default:
798     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
799   }
800   PetscFunctionReturn(0);
801 }
802 
803 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
804 {
805   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
806   const PetscScalar *l;
807   PetscScalar       x,*v,*vv,*r;
808   PetscErrorCode    ierr;
809   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
810 
811   PetscFunctionBegin;
812   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
813   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
814   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
815   if (ll) {
816     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
817     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
818     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
819     for (i=0; i<m; i++) {
820       x = l[i];
821       v = vv + i;
822       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
823     }
824     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
825     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
826   }
827   if (rr) {
828     const PetscScalar *ar;
829 
830     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
831     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
832     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
833     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
834     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
835     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
836     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
837     for (i=0; i<n; i++) {
838       x = r[i];
839       v = vv + i*lda;
840       for (j=0; j<m; j++) (*v++) *= x;
841     }
842     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
843     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
844   }
845   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
850 {
851   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
852   PetscErrorCode    ierr;
853   PetscInt          i,j;
854   PetscMPIInt       size;
855   PetscReal         sum = 0.0;
856   const PetscScalar *av,*v;
857 
858   PetscFunctionBegin;
859   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
860   v    = av;
861   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
862   if (size == 1) {
863     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
864   } else {
865     if (type == NORM_FROBENIUS) {
866       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
867         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
868       }
869       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
870       *nrm = PetscSqrtReal(*nrm);
871       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
872     } else if (type == NORM_1) {
873       PetscReal *tmp,*tmp2;
874       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
875       *nrm = 0.0;
876       v    = av;
877       for (j=0; j<mdn->A->cmap->n; j++) {
878         for (i=0; i<mdn->A->rmap->n; i++) {
879           tmp[j] += PetscAbsScalar(*v);  v++;
880         }
881       }
882       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
883       for (j=0; j<A->cmap->N; j++) {
884         if (tmp2[j] > *nrm) *nrm = tmp2[j];
885       }
886       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
887       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
888     } else if (type == NORM_INFINITY) { /* max row norm */
889       PetscReal ntemp;
890       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
891       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
892     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
893   }
894   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
895   PetscFunctionReturn(0);
896 }
897 
898 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
899 {
900   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
901   Mat            B;
902   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
903   PetscErrorCode ierr;
904   PetscInt       j,i,lda;
905   PetscScalar    *v;
906 
907   PetscFunctionBegin;
908   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
909     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
910     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
911     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
912     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
913   } else B = *matout;
914 
915   m    = a->A->rmap->n; n = a->A->cmap->n;
916   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
917   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
918   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
919   for (i=0; i<m; i++) rwork[i] = rstart + i;
920   for (j=0; j<n; j++) {
921     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
922     v   += lda;
923   }
924   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
925   ierr = PetscFree(rwork);CHKERRQ(ierr);
926   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
927   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
928   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
929     *matout = B;
930   } else {
931     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
937 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
938 
939 PetscErrorCode MatSetUp_MPIDense(Mat A)
940 {
941   PetscErrorCode ierr;
942 
943   PetscFunctionBegin;
944   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
945   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
946   if (!A->preallocated) {
947     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
948   }
949   PetscFunctionReturn(0);
950 }
951 
952 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
953 {
954   PetscErrorCode ierr;
955   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
956 
957   PetscFunctionBegin;
958   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 PetscErrorCode MatConjugate_MPIDense(Mat mat)
963 {
964   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   ierr = MatConjugate(a->A);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatRealPart_MPIDense(Mat A)
973 {
974   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
975   PetscErrorCode ierr;
976 
977   PetscFunctionBegin;
978   ierr = MatRealPart(a->A);CHKERRQ(ierr);
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
983 {
984   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
993 {
994   PetscErrorCode ierr;
995   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
996 
997   PetscFunctionBegin;
998   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
999   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1000   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1005 
1006 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1007 {
1008   PetscErrorCode ierr;
1009   PetscInt       i,n;
1010   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1011   PetscReal      *work;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1015   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1016   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1017   if (type == NORM_2) {
1018     for (i=0; i<n; i++) work[i] *= work[i];
1019   }
1020   if (type == NORM_INFINITY) {
1021     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1022   } else {
1023     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1024   }
1025   ierr = PetscFree(work);CHKERRQ(ierr);
1026   if (type == NORM_2) {
1027     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1028   }
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 #if defined(PETSC_HAVE_CUDA)
1033 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1034 {
1035   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1036   PetscErrorCode ierr;
1037   PetscInt       lda;
1038 
1039   PetscFunctionBegin;
1040   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1041   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1042   if (!a->cvec) {
1043     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1044     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1045   }
1046   a->vecinuse = col + 1;
1047   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1048   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1049   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1050   *v   = a->cvec;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1055 {
1056   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1061   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1062   a->vecinuse = 0;
1063   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1064   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1065   *v   = NULL;
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1070 {
1071   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1072   PetscInt       lda;
1073   PetscErrorCode ierr;
1074 
1075   PetscFunctionBegin;
1076   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1077   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1078   if (!a->cvec) {
1079     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1080     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1081   }
1082   a->vecinuse = col + 1;
1083   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1084   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1085   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1086   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1087   *v   = a->cvec;
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1092 {
1093   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1098   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1099   a->vecinuse = 0;
1100   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1101   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1102   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1103   *v   = NULL;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1108 {
1109   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1110   PetscErrorCode ierr;
1111   PetscInt       lda;
1112 
1113   PetscFunctionBegin;
1114   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1115   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1116   if (!a->cvec) {
1117     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1118     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1119   }
1120   a->vecinuse = col + 1;
1121   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1122   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1123   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1124   *v   = a->cvec;
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1129 {
1130   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1135   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1136   a->vecinuse = 0;
1137   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1138   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1139   *v   = NULL;
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1144 {
1145   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1146   PetscErrorCode ierr;
1147 
1148   PetscFunctionBegin;
1149   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1150   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1151   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1156 {
1157   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1158   PetscErrorCode ierr;
1159 
1160   PetscFunctionBegin;
1161   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1162   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1163   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1168 {
1169   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1170   PetscErrorCode ierr;
1171 
1172   PetscFunctionBegin;
1173   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1174   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1175   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1180 {
1181   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1182   PetscErrorCode ierr;
1183 
1184   PetscFunctionBegin;
1185   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1190 {
1191   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1200 {
1201   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1210 {
1211   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1212   PetscErrorCode ierr;
1213 
1214   PetscFunctionBegin;
1215   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1220 {
1221   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1222   PetscErrorCode ierr;
1223 
1224   PetscFunctionBegin;
1225   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1230 {
1231   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1240 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1241 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1242 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1243 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1244 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1245 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1246 
1247 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1248 {
1249   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1254   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1255   if (d->A) {
1256     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1257   }
1258   mat->boundtocpu = bind;
1259   if (!bind) {
1260     PetscBool iscuda;
1261 
1262     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1263     if (!iscuda) {
1264       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1265     }
1266     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1267     if (!iscuda) {
1268       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1269     }
1270     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1271     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1272     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1273     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1274     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1275     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1276   } else {
1277     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1278     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1279     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1280     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1281     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1282     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1283   }
1284   if (d->cmat) {
1285     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1286   }
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1291 {
1292   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1293   PetscErrorCode ierr;
1294   PetscBool      iscuda;
1295 
1296   PetscFunctionBegin;
1297   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1298   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1299   if (!iscuda) PetscFunctionReturn(0);
1300   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1301   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1302   if (!d->A) {
1303     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1304     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1305     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1306   }
1307   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1308   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1309   A->preallocated = PETSC_TRUE;
1310   PetscFunctionReturn(0);
1311 }
1312 #endif
1313 
1314 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1315 {
1316   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1325 {
1326   PetscFunctionBegin;
1327   *missing = PETSC_FALSE;
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1332 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1333 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1334 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1335 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1336 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1337 
1338 /* -------------------------------------------------------------------*/
1339 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1340                                         MatGetRow_MPIDense,
1341                                         MatRestoreRow_MPIDense,
1342                                         MatMult_MPIDense,
1343                                 /*  4*/ MatMultAdd_MPIDense,
1344                                         MatMultTranspose_MPIDense,
1345                                         MatMultTransposeAdd_MPIDense,
1346                                         0,
1347                                         0,
1348                                         0,
1349                                 /* 10*/ 0,
1350                                         0,
1351                                         0,
1352                                         0,
1353                                         MatTranspose_MPIDense,
1354                                 /* 15*/ MatGetInfo_MPIDense,
1355                                         MatEqual_MPIDense,
1356                                         MatGetDiagonal_MPIDense,
1357                                         MatDiagonalScale_MPIDense,
1358                                         MatNorm_MPIDense,
1359                                 /* 20*/ MatAssemblyBegin_MPIDense,
1360                                         MatAssemblyEnd_MPIDense,
1361                                         MatSetOption_MPIDense,
1362                                         MatZeroEntries_MPIDense,
1363                                 /* 24*/ MatZeroRows_MPIDense,
1364                                         0,
1365                                         0,
1366                                         0,
1367                                         0,
1368                                 /* 29*/ MatSetUp_MPIDense,
1369                                         0,
1370                                         0,
1371                                         MatGetDiagonalBlock_MPIDense,
1372                                         0,
1373                                 /* 34*/ MatDuplicate_MPIDense,
1374                                         0,
1375                                         0,
1376                                         0,
1377                                         0,
1378                                 /* 39*/ MatAXPY_MPIDense,
1379                                         MatCreateSubMatrices_MPIDense,
1380                                         0,
1381                                         MatGetValues_MPIDense,
1382                                         0,
1383                                 /* 44*/ 0,
1384                                         MatScale_MPIDense,
1385                                         MatShift_Basic,
1386                                         0,
1387                                         0,
1388                                 /* 49*/ MatSetRandom_MPIDense,
1389                                         0,
1390                                         0,
1391                                         0,
1392                                         0,
1393                                 /* 54*/ 0,
1394                                         0,
1395                                         0,
1396                                         0,
1397                                         0,
1398                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1399                                         MatDestroy_MPIDense,
1400                                         MatView_MPIDense,
1401                                         0,
1402                                         0,
1403                                 /* 64*/ 0,
1404                                         0,
1405                                         0,
1406                                         0,
1407                                         0,
1408                                 /* 69*/ 0,
1409                                         0,
1410                                         0,
1411                                         0,
1412                                         0,
1413                                 /* 74*/ 0,
1414                                         0,
1415                                         0,
1416                                         0,
1417                                         0,
1418                                 /* 79*/ 0,
1419                                         0,
1420                                         0,
1421                                         0,
1422                                 /* 83*/ MatLoad_MPIDense,
1423                                         0,
1424                                         0,
1425                                         0,
1426                                         0,
1427                                         0,
1428                                 /* 89*/ 0,
1429                                         0,
1430                                         0,
1431                                         0,
1432                                         0,
1433                                 /* 94*/ 0,
1434                                         0,
1435                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1436                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1437                                         0,
1438                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1439                                         0,
1440                                         0,
1441                                         MatConjugate_MPIDense,
1442                                         0,
1443                                 /*104*/ 0,
1444                                         MatRealPart_MPIDense,
1445                                         MatImaginaryPart_MPIDense,
1446                                         0,
1447                                         0,
1448                                 /*109*/ 0,
1449                                         0,
1450                                         0,
1451                                         MatGetColumnVector_MPIDense,
1452                                         MatMissingDiagonal_MPIDense,
1453                                 /*114*/ 0,
1454                                         0,
1455                                         0,
1456                                         0,
1457                                         0,
1458                                 /*119*/ 0,
1459                                         0,
1460                                         0,
1461                                         0,
1462                                         0,
1463                                 /*124*/ 0,
1464                                         MatGetColumnNorms_MPIDense,
1465                                         0,
1466                                         0,
1467                                         0,
1468                                 /*129*/ 0,
1469                                         0,
1470                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1471                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1472                                         0,
1473                                 /*134*/ 0,
1474                                         0,
1475                                         0,
1476                                         0,
1477                                         0,
1478                                 /*139*/ 0,
1479                                         0,
1480                                         0,
1481                                         0,
1482                                         0,
1483                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1484                                 /*145*/ 0,
1485                                         0,
1486                                         0
1487 };
1488 
1489 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1490 {
1491   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1492   PetscBool      iscuda;
1493   PetscErrorCode ierr;
1494 
1495   PetscFunctionBegin;
1496   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1497   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1498   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1499   if (!a->A) {
1500     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1501     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1502     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1503   }
1504   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1505   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1506   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1507   mat->preallocated = PETSC_TRUE;
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 #if defined(PETSC_HAVE_ELEMENTAL)
1512 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1513 {
1514   Mat            mat_elemental;
1515   PetscErrorCode ierr;
1516   PetscScalar    *v;
1517   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1518 
1519   PetscFunctionBegin;
1520   if (reuse == MAT_REUSE_MATRIX) {
1521     mat_elemental = *newmat;
1522     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1523   } else {
1524     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1525     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1526     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1527     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1528     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1529   }
1530 
1531   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1532   for (i=0; i<N; i++) cols[i] = i;
1533   for (i=0; i<m; i++) rows[i] = rstart + i;
1534 
1535   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1536   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1537   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1538   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1539   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1540   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1541   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1542 
1543   if (reuse == MAT_INPLACE_MATRIX) {
1544     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1545   } else {
1546     *newmat = mat_elemental;
1547   }
1548   PetscFunctionReturn(0);
1549 }
1550 #endif
1551 
1552 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1553 {
1554   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1555   PetscErrorCode ierr;
1556 
1557   PetscFunctionBegin;
1558   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1563 {
1564   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1565   PetscErrorCode ierr;
1566 
1567   PetscFunctionBegin;
1568   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1573 {
1574   PetscErrorCode ierr;
1575   Mat_MPIDense   *mat;
1576   PetscInt       m,nloc,N;
1577 
1578   PetscFunctionBegin;
1579   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1580   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1581   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1582     PetscInt sum;
1583 
1584     if (n == PETSC_DECIDE) {
1585       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1586     }
1587     /* Check sum(n) = N */
1588     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1589     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1590 
1591     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1592   }
1593 
1594   /* numeric phase */
1595   mat = (Mat_MPIDense*)(*outmat)->data;
1596   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1597   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1598   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #if defined(PETSC_HAVE_CUDA)
1603 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1604 {
1605   Mat            B;
1606   Mat_MPIDense   *m;
1607   PetscErrorCode ierr;
1608 
1609   PetscFunctionBegin;
1610   if (reuse == MAT_INITIAL_MATRIX) {
1611     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1612   } else if (reuse == MAT_REUSE_MATRIX) {
1613     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1614   }
1615 
1616   B    = *newmat;
1617   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1618   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1619   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1620   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1622   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1624   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1626   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1627   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1629   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1630   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1632   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1633   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1635   m    = (Mat_MPIDense*)(B)->data;
1636   if (m->A) {
1637     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1638     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1639   }
1640   B->ops->bindtocpu = NULL;
1641   B->offloadmask    = PETSC_OFFLOAD_CPU;
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1646 {
1647   Mat            B;
1648   Mat_MPIDense   *m;
1649   PetscErrorCode ierr;
1650 
1651   PetscFunctionBegin;
1652   if (reuse == MAT_INITIAL_MATRIX) {
1653     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1654   } else if (reuse == MAT_REUSE_MATRIX) {
1655     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1656   }
1657 
1658   B    = *newmat;
1659   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1660   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1661   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1676   m    = (Mat_MPIDense*)(B)->data;
1677   if (m->A) {
1678     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1679     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1680     B->offloadmask = PETSC_OFFLOAD_BOTH;
1681   } else {
1682     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1683   }
1684   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1685 
1686   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1687   PetscFunctionReturn(0);
1688 }
1689 #endif
1690 
1691 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1692 {
1693   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1694   PetscErrorCode ierr;
1695   PetscInt       lda;
1696 
1697   PetscFunctionBegin;
1698   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1699   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1700   if (!a->cvec) {
1701     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1702   }
1703   a->vecinuse = col + 1;
1704   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1705   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1706   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1707   *v   = a->cvec;
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1712 {
1713   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1714   PetscErrorCode ierr;
1715 
1716   PetscFunctionBegin;
1717   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1718   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1719   a->vecinuse = 0;
1720   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1721   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1722   *v   = NULL;
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1727 {
1728   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1729   PetscErrorCode ierr;
1730   PetscInt       lda;
1731 
1732   PetscFunctionBegin;
1733   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1734   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1735   if (!a->cvec) {
1736     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1737   }
1738   a->vecinuse = col + 1;
1739   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1740   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1741   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1742   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1743   *v   = a->cvec;
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1748 {
1749   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1750   PetscErrorCode ierr;
1751 
1752   PetscFunctionBegin;
1753   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1754   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1755   a->vecinuse = 0;
1756   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1757   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1758   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1759   *v   = NULL;
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1764 {
1765   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1766   PetscErrorCode ierr;
1767   PetscInt       lda;
1768 
1769   PetscFunctionBegin;
1770   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1771   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1772   if (!a->cvec) {
1773     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1774   }
1775   a->vecinuse = col + 1;
1776   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1777   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1778   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1779   *v   = a->cvec;
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1784 {
1785   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1786   PetscErrorCode ierr;
1787 
1788   PetscFunctionBegin;
1789   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1790   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1791   a->vecinuse = 0;
1792   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1793   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1794   *v   = NULL;
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1799 {
1800   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1801   Mat_MPIDense   *c;
1802   PetscErrorCode ierr;
1803   MPI_Comm       comm;
1804   PetscBool      setup = PETSC_FALSE;
1805 
1806   PetscFunctionBegin;
1807   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1808   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1809   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1810   if (!a->cmat) {
1811     setup = PETSC_TRUE;
1812     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1813     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1814     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1815     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1816     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1817     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1818   } else if (cend-cbegin != a->cmat->cmap->N) {
1819     setup = PETSC_TRUE;
1820     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1821     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1822     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1823     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1824   }
1825   c = (Mat_MPIDense*)a->cmat->data;
1826   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1827   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1828   if (setup) { /* do we really need this? */
1829     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1830   }
1831   a->cmat->preallocated = PETSC_TRUE;
1832   a->cmat->assembled = PETSC_TRUE;
1833   a->matinuse = cbegin + 1;
1834   *v = a->cmat;
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1839 {
1840   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1841   Mat_MPIDense   *c;
1842   PetscErrorCode ierr;
1843 
1844   PetscFunctionBegin;
1845   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1846   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1847   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1848   a->matinuse = 0;
1849   c    = (Mat_MPIDense*)a->cmat->data;
1850   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1851   *v   = NULL;
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1856 {
1857   Mat_MPIDense   *a;
1858   PetscErrorCode ierr;
1859 
1860   PetscFunctionBegin;
1861   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1862   mat->data = (void*)a;
1863   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1864 
1865   mat->insertmode = NOT_SET_VALUES;
1866 
1867   /* build cache for off array entries formed */
1868   a->donotstash = PETSC_FALSE;
1869 
1870   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1871 
1872   /* stuff used for matrix vector multiply */
1873   a->lvec        = 0;
1874   a->Mvctx       = 0;
1875   a->roworiented = PETSC_TRUE;
1876 
1877   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1878   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1879   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1880   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1881   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1882   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1883   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1884   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1885   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1886   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1887   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1888   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1889   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1890   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1891   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1892   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1893   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1894   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1895   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1896 #if defined(PETSC_HAVE_ELEMENTAL)
1897   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1898 #endif
1899 #if defined(PETSC_HAVE_SCALAPACK)
1900   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1901 #endif
1902 #if defined(PETSC_HAVE_CUDA)
1903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1904 #endif
1905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1908 #if defined(PETSC_HAVE_CUDA)
1909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1911 #endif
1912 
1913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1915   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 /*MC
1920    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1921 
1922    Options Database Keys:
1923 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1924 
1925   Level: beginner
1926 
1927 .seealso:
1928 
1929 M*/
1930 #if defined(PETSC_HAVE_CUDA)
1931 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1932 {
1933   PetscErrorCode ierr;
1934 
1935   PetscFunctionBegin;
1936   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1937   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1938   PetscFunctionReturn(0);
1939 }
1940 #endif
1941 
1942 /*MC
1943    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1944 
1945    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1946    and MATMPIDENSE otherwise.
1947 
1948    Options Database Keys:
1949 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1950 
1951   Level: beginner
1952 
1953 
1954 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1955 M*/
1956 
1957 /*MC
1958    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1959 
1960    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1961    and MATMPIDENSECUDA otherwise.
1962 
1963    Options Database Keys:
1964 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1965 
1966   Level: beginner
1967 
1968 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1969 M*/
1970 
1971 /*@C
1972    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1973 
1974    Collective
1975 
1976    Input Parameters:
1977 .  B - the matrix
1978 -  data - optional location of matrix data.  Set data=NULL for PETSc
1979    to control all matrix memory allocation.
1980 
1981    Notes:
1982    The dense format is fully compatible with standard Fortran 77
1983    storage by columns.
1984 
1985    The data input variable is intended primarily for Fortran programmers
1986    who wish to allocate their own matrix memory space.  Most users should
1987    set data=NULL.
1988 
1989    Level: intermediate
1990 
1991 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1992 @*/
1993 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1994 {
1995   PetscErrorCode ierr;
1996 
1997   PetscFunctionBegin;
1998   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1999   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 /*@
2004    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2005    array provided by the user. This is useful to avoid copying an array
2006    into a matrix
2007 
2008    Not Collective
2009 
2010    Input Parameters:
2011 +  mat - the matrix
2012 -  array - the array in column major order
2013 
2014    Notes:
2015    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2016    freed when the matrix is destroyed.
2017 
2018    Level: developer
2019 
2020 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2021 
2022 @*/
2023 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2024 {
2025   PetscErrorCode ierr;
2026 
2027   PetscFunctionBegin;
2028   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2029   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2030   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2031 #if defined(PETSC_HAVE_CUDA)
2032   mat->offloadmask = PETSC_OFFLOAD_CPU;
2033 #endif
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 /*@
2038    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2039 
2040    Not Collective
2041 
2042    Input Parameters:
2043 .  mat - the matrix
2044 
2045    Notes:
2046    You can only call this after a call to MatDensePlaceArray()
2047 
2048    Level: developer
2049 
2050 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2051 
2052 @*/
2053 PetscErrorCode  MatDenseResetArray(Mat mat)
2054 {
2055   PetscErrorCode ierr;
2056 
2057   PetscFunctionBegin;
2058   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2059   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2060   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 /*@
2065    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2066    array provided by the user. This is useful to avoid copying an array
2067    into a matrix
2068 
2069    Not Collective
2070 
2071    Input Parameters:
2072 +  mat - the matrix
2073 -  array - the array in column major order
2074 
2075    Notes:
2076    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2077    freed by the user. It will be freed when the matrix is destroyed.
2078 
2079    Level: developer
2080 
2081 .seealso: MatDenseGetArray(), VecReplaceArray()
2082 @*/
2083 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2084 {
2085   PetscErrorCode ierr;
2086 
2087   PetscFunctionBegin;
2088   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2089   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2090   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2091 #if defined(PETSC_HAVE_CUDA)
2092   mat->offloadmask = PETSC_OFFLOAD_CPU;
2093 #endif
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #if defined(PETSC_HAVE_CUDA)
2098 /*@C
2099    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2100    array provided by the user. This is useful to avoid copying an array
2101    into a matrix
2102 
2103    Not Collective
2104 
2105    Input Parameters:
2106 +  mat - the matrix
2107 -  array - the array in column major order
2108 
2109    Notes:
2110    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2111    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2112 
2113    Level: developer
2114 
2115 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2116 @*/
2117 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2118 {
2119   PetscErrorCode ierr;
2120 
2121   PetscFunctionBegin;
2122   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2123   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2124   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2125   mat->offloadmask = PETSC_OFFLOAD_GPU;
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 /*@C
2130    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2131 
2132    Not Collective
2133 
2134    Input Parameters:
2135 .  mat - the matrix
2136 
2137    Notes:
2138    You can only call this after a call to MatDenseCUDAPlaceArray()
2139 
2140    Level: developer
2141 
2142 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2143 
2144 @*/
2145 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2146 {
2147   PetscErrorCode ierr;
2148 
2149   PetscFunctionBegin;
2150   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2151   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2152   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 /*@C
2157    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2158    array provided by the user. This is useful to avoid copying an array
2159    into a matrix
2160 
2161    Not Collective
2162 
2163    Input Parameters:
2164 +  mat - the matrix
2165 -  array - the array in column major order
2166 
2167    Notes:
2168    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2169    The memory passed in CANNOT be freed by the user. It will be freed
2170    when the matrix is destroyed. The array should respect the matrix leading dimension.
2171 
2172    Level: developer
2173 
2174 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2175 @*/
2176 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2177 {
2178   PetscErrorCode ierr;
2179 
2180   PetscFunctionBegin;
2181   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2182   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2183   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2184   mat->offloadmask = PETSC_OFFLOAD_GPU;
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 /*@C
2189    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2190 
2191    Not Collective
2192 
2193    Input Parameters:
2194 .  A - the matrix
2195 
2196    Output Parameters
2197 .  array - the GPU array in column major order
2198 
2199    Notes:
2200    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2201 
2202    Level: developer
2203 
2204 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2205 @*/
2206 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2207 {
2208   PetscErrorCode ierr;
2209 
2210   PetscFunctionBegin;
2211   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2212   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2213   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 /*@C
2218    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2219 
2220    Not Collective
2221 
2222    Input Parameters:
2223 +  A - the matrix
2224 -  array - the GPU array in column major order
2225 
2226    Notes:
2227 
2228    Level: developer
2229 
2230 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2231 @*/
2232 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2233 {
2234   PetscErrorCode ierr;
2235 
2236   PetscFunctionBegin;
2237   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2238   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2239   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2240   A->offloadmask = PETSC_OFFLOAD_GPU;
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 /*@C
2245    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2246 
2247    Not Collective
2248 
2249    Input Parameters:
2250 .  A - the matrix
2251 
2252    Output Parameters
2253 .  array - the GPU array in column major order
2254 
2255    Notes:
2256    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2257 
2258    Level: developer
2259 
2260 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2261 @*/
2262 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2263 {
2264   PetscErrorCode ierr;
2265 
2266   PetscFunctionBegin;
2267   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2268   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 /*@C
2273    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2274 
2275    Not Collective
2276 
2277    Input Parameters:
2278 +  A - the matrix
2279 -  array - the GPU array in column major order
2280 
2281    Notes:
2282    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2283 
2284    Level: developer
2285 
2286 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2287 @*/
2288 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2289 {
2290   PetscErrorCode ierr;
2291 
2292   PetscFunctionBegin;
2293   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 /*@C
2298    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2299 
2300    Not Collective
2301 
2302    Input Parameters:
2303 .  A - the matrix
2304 
2305    Output Parameters
2306 .  array - the GPU array in column major order
2307 
2308    Notes:
2309    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2310 
2311    Level: developer
2312 
2313 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2314 @*/
2315 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2316 {
2317   PetscErrorCode ierr;
2318 
2319   PetscFunctionBegin;
2320   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2321   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2322   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 /*@C
2327    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2328 
2329    Not Collective
2330 
2331    Input Parameters:
2332 +  A - the matrix
2333 -  array - the GPU array in column major order
2334 
2335    Notes:
2336 
2337    Level: developer
2338 
2339 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2340 @*/
2341 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2342 {
2343   PetscErrorCode ierr;
2344 
2345   PetscFunctionBegin;
2346   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2347   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2348   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2349   A->offloadmask = PETSC_OFFLOAD_GPU;
2350   PetscFunctionReturn(0);
2351 }
2352 #endif
2353 
2354 /*@C
2355    MatCreateDense - Creates a matrix in dense format.
2356 
2357    Collective
2358 
2359    Input Parameters:
2360 +  comm - MPI communicator
2361 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2362 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2363 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2364 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2365 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2366    to control all matrix memory allocation.
2367 
2368    Output Parameter:
2369 .  A - the matrix
2370 
2371    Notes:
2372    The dense format is fully compatible with standard Fortran 77
2373    storage by columns.
2374 
2375    The data input variable is intended primarily for Fortran programmers
2376    who wish to allocate their own matrix memory space.  Most users should
2377    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2378 
2379    The user MUST specify either the local or global matrix dimensions
2380    (possibly both).
2381 
2382    Level: intermediate
2383 
2384 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2385 @*/
2386 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2387 {
2388   PetscErrorCode ierr;
2389   PetscMPIInt    size;
2390 
2391   PetscFunctionBegin;
2392   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2393   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2394   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2395   if (size > 1) {
2396     PetscBool havedata = (PetscBool)!!data;
2397 
2398     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2399     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2400     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr);
2401     if (havedata) {  /* user provided data array, so no need to assemble */
2402       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2403       (*A)->assembled = PETSC_TRUE;
2404     }
2405   } else {
2406     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2407     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2408   }
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 #if defined(PETSC_HAVE_CUDA)
2413 /*@C
2414    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2415 
2416    Collective
2417 
2418    Input Parameters:
2419 +  comm - MPI communicator
2420 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2421 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2422 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2423 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2424 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2425    to control matrix memory allocation.
2426 
2427    Output Parameter:
2428 .  A - the matrix
2429 
2430    Notes:
2431 
2432    Level: intermediate
2433 
2434 .seealso: MatCreate(), MatCreateDense()
2435 @*/
2436 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2437 {
2438   PetscErrorCode ierr;
2439   PetscMPIInt    size;
2440 
2441   PetscFunctionBegin;
2442   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2443   PetscValidLogicalCollectiveBool(*A,!!data,6);
2444   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2445   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2446   if (size > 1) {
2447     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2448     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2449     if (data) {  /* user provided data array, so no need to assemble */
2450       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2451       (*A)->assembled = PETSC_TRUE;
2452     }
2453   } else {
2454     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2455     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2456   }
2457   PetscFunctionReturn(0);
2458 }
2459 #endif
2460 
2461 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2462 {
2463   Mat            mat;
2464   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2465   PetscErrorCode ierr;
2466 
2467   PetscFunctionBegin;
2468   *newmat = 0;
2469   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2470   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2471   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2472   a       = (Mat_MPIDense*)mat->data;
2473 
2474   mat->factortype   = A->factortype;
2475   mat->assembled    = PETSC_TRUE;
2476   mat->preallocated = PETSC_TRUE;
2477 
2478   mat->insertmode = NOT_SET_VALUES;
2479   a->donotstash   = oldmat->donotstash;
2480 
2481   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2482   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2483 
2484   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2485   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2486   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2487 
2488   *newmat = mat;
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2493 {
2494   PetscErrorCode ierr;
2495   PetscBool      isbinary;
2496 #if defined(PETSC_HAVE_HDF5)
2497   PetscBool      ishdf5;
2498 #endif
2499 
2500   PetscFunctionBegin;
2501   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2502   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2503   /* force binary viewer to load .info file if it has not yet done so */
2504   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2505   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2506 #if defined(PETSC_HAVE_HDF5)
2507   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2508 #endif
2509   if (isbinary) {
2510     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2511 #if defined(PETSC_HAVE_HDF5)
2512   } else if (ishdf5) {
2513     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2514 #endif
2515   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2520 {
2521   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2522   Mat            a,b;
2523   PetscBool      flg;
2524   PetscErrorCode ierr;
2525 
2526   PetscFunctionBegin;
2527   a    = matA->A;
2528   b    = matB->A;
2529   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2530   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2535 {
2536   PetscErrorCode        ierr;
2537   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2538 
2539   PetscFunctionBegin;
2540   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2541   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2542   ierr = PetscFree(atb);CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2547 {
2548   PetscErrorCode        ierr;
2549   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2550 
2551   PetscFunctionBegin;
2552   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2553   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2554   ierr = PetscFree(abt);CHKERRQ(ierr);
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2559 {
2560   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2561   Mat_TransMatMultDense *atb;
2562   PetscErrorCode        ierr;
2563   MPI_Comm              comm;
2564   PetscMPIInt           size,*recvcounts;
2565   PetscScalar           *carray,*sendbuf;
2566   const PetscScalar     *atbarray;
2567   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2568   const PetscInt        *ranges;
2569 
2570   PetscFunctionBegin;
2571   MatCheckProduct(C,3);
2572   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2573   atb = (Mat_TransMatMultDense *)C->product->data;
2574   recvcounts = atb->recvcounts;
2575   sendbuf = atb->sendbuf;
2576 
2577   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2578   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2579 
2580   /* compute atbarray = aseq^T * bseq */
2581   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2582 
2583   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2584 
2585   /* arrange atbarray into sendbuf */
2586   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2587   for (proc=0, k=0; proc<size; proc++) {
2588     for (j=0; j<cN; j++) {
2589       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2590     }
2591   }
2592   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2593 
2594   /* sum all atbarray to local values of C */
2595   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2596   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2597   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2598   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2599   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2604 {
2605   PetscErrorCode        ierr;
2606   MPI_Comm              comm;
2607   PetscMPIInt           size;
2608   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2609   Mat_TransMatMultDense *atb;
2610   PetscBool             cisdense;
2611   PetscInt              i;
2612   const PetscInt        *ranges;
2613 
2614   PetscFunctionBegin;
2615   MatCheckProduct(C,3);
2616   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2617   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2618   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2619     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2620   }
2621 
2622   /* create matrix product C */
2623   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2624   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2625   if (!cisdense) {
2626     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2627   }
2628   ierr = MatSetUp(C);CHKERRQ(ierr);
2629 
2630   /* create data structure for reuse C */
2631   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2632   ierr = PetscNew(&atb);CHKERRQ(ierr);
2633   cM   = C->rmap->N;
2634   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2635   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2636   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2637 
2638   C->product->data    = atb;
2639   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2640   PetscFunctionReturn(0);
2641 }
2642 
2643 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2644 {
2645   PetscErrorCode        ierr;
2646   MPI_Comm              comm;
2647   PetscMPIInt           i, size;
2648   PetscInt              maxRows, bufsiz;
2649   PetscMPIInt           tag;
2650   PetscInt              alg;
2651   Mat_MatTransMultDense *abt;
2652   Mat_Product           *product = C->product;
2653   PetscBool             flg;
2654 
2655   PetscFunctionBegin;
2656   MatCheckProduct(C,4);
2657   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2658   /* check local size of A and B */
2659   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2660 
2661   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2662   alg  = flg ? 0 : 1;
2663 
2664   /* setup matrix product C */
2665   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2666   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2667   ierr = MatSetUp(C);CHKERRQ(ierr);
2668   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2669 
2670   /* create data structure for reuse C */
2671   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2672   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2673   ierr = PetscNew(&abt);CHKERRQ(ierr);
2674   abt->tag = tag;
2675   abt->alg = alg;
2676   switch (alg) {
2677   case 1: /* alg: "cyclic" */
2678     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2679     bufsiz = A->cmap->N * maxRows;
2680     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2681     break;
2682   default: /* alg: "allgatherv" */
2683     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2684     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2685     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2686     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2687     break;
2688   }
2689 
2690   C->product->data    = abt;
2691   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2692   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2693   PetscFunctionReturn(0);
2694 }
2695 
2696 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2697 {
2698   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2699   Mat_MatTransMultDense *abt;
2700   PetscErrorCode        ierr;
2701   MPI_Comm              comm;
2702   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2703   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2704   PetscInt              i,cK=A->cmap->N,k,j,bn;
2705   PetscScalar           _DOne=1.0,_DZero=0.0;
2706   const PetscScalar     *av,*bv;
2707   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2708   MPI_Request           reqs[2];
2709   const PetscInt        *ranges;
2710 
2711   PetscFunctionBegin;
2712   MatCheckProduct(C,3);
2713   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2714   abt  = (Mat_MatTransMultDense*)C->product->data;
2715   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2716   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2717   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2718   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2719   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2720   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2721   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2722   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2723   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2724   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2725   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2726   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2727   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2728   bn   = B->rmap->n;
2729   if (blda == bn) {
2730     sendbuf = (PetscScalar*)bv;
2731   } else {
2732     sendbuf = abt->buf[0];
2733     for (k = 0, i = 0; i < cK; i++) {
2734       for (j = 0; j < bn; j++, k++) {
2735         sendbuf[k] = bv[i * blda + j];
2736       }
2737     }
2738   }
2739   if (size > 1) {
2740     sendto = (rank + size - 1) % size;
2741     recvfrom = (rank + size + 1) % size;
2742   } else {
2743     sendto = recvfrom = 0;
2744   }
2745   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2746   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2747   recvisfrom = rank;
2748   for (i = 0; i < size; i++) {
2749     /* we have finished receiving in sending, bufs can be read/modified */
2750     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2751     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2752 
2753     if (nextrecvisfrom != rank) {
2754       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2755       sendsiz = cK * bn;
2756       recvsiz = cK * nextbn;
2757       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2758       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2759       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2760     }
2761 
2762     /* local aseq * sendbuf^T */
2763     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2764     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2765 
2766     if (nextrecvisfrom != rank) {
2767       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2768       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2769     }
2770     bn = nextbn;
2771     recvisfrom = nextrecvisfrom;
2772     sendbuf = recvbuf;
2773   }
2774   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2775   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2776   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2777   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2778   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2783 {
2784   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2785   Mat_MatTransMultDense *abt;
2786   PetscErrorCode        ierr;
2787   MPI_Comm              comm;
2788   PetscMPIInt           size;
2789   PetscScalar           *cv, *sendbuf, *recvbuf;
2790   const PetscScalar     *av,*bv;
2791   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2792   PetscScalar           _DOne=1.0,_DZero=0.0;
2793   PetscBLASInt          cm, cn, ck, alda, clda;
2794 
2795   PetscFunctionBegin;
2796   MatCheckProduct(C,3);
2797   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2798   abt  = (Mat_MatTransMultDense*)C->product->data;
2799   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2800   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2801   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2802   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2803   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2804   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2805   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2806   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2807   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2808   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2809   /* copy transpose of B into buf[0] */
2810   bn      = B->rmap->n;
2811   sendbuf = abt->buf[0];
2812   recvbuf = abt->buf[1];
2813   for (k = 0, j = 0; j < bn; j++) {
2814     for (i = 0; i < cK; i++, k++) {
2815       sendbuf[k] = bv[i * blda + j];
2816     }
2817   }
2818   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2819   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2820   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2821   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2822   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2823   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2824   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2825   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2826   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2827   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2828   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2833 {
2834   Mat_MatTransMultDense *abt;
2835   PetscErrorCode        ierr;
2836 
2837   PetscFunctionBegin;
2838   MatCheckProduct(C,3);
2839   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2840   abt = (Mat_MatTransMultDense*)C->product->data;
2841   switch (abt->alg) {
2842   case 1:
2843     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2844     break;
2845   default:
2846     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2847     break;
2848   }
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2853 {
2854   PetscErrorCode   ierr;
2855   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2856 
2857   PetscFunctionBegin;
2858   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2859   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2860   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2861   ierr = PetscFree(ab);CHKERRQ(ierr);
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 #if defined(PETSC_HAVE_ELEMENTAL)
2866 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2867 {
2868   PetscErrorCode   ierr;
2869   Mat_MatMultDense *ab;
2870 
2871   PetscFunctionBegin;
2872   MatCheckProduct(C,3);
2873   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2874   ab   = (Mat_MatMultDense*)C->product->data;
2875   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2876   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2877   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2878   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2879   PetscFunctionReturn(0);
2880 }
2881 
2882 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2883 {
2884   PetscErrorCode   ierr;
2885   Mat              Ae,Be,Ce;
2886   Mat_MatMultDense *ab;
2887 
2888   PetscFunctionBegin;
2889   MatCheckProduct(C,4);
2890   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2891   /* check local size of A and B */
2892   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2893     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2894   }
2895 
2896   /* create elemental matrices Ae and Be */
2897   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2898   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2899   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2900   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2901   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2902 
2903   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2904   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2905   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2906   ierr = MatSetUp(Be);CHKERRQ(ierr);
2907   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2908 
2909   /* compute symbolic Ce = Ae*Be */
2910   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2911   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2912 
2913   /* setup C */
2914   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2915   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2916   ierr = MatSetUp(C);CHKERRQ(ierr);
2917 
2918   /* create data structure for reuse Cdense */
2919   ierr = PetscNew(&ab);CHKERRQ(ierr);
2920   ab->Ae = Ae;
2921   ab->Be = Be;
2922   ab->Ce = Ce;
2923 
2924   C->product->data    = ab;
2925   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2926   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2927   PetscFunctionReturn(0);
2928 }
2929 #endif
2930 /* ----------------------------------------------- */
2931 #if defined(PETSC_HAVE_ELEMENTAL)
2932 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2933 {
2934   PetscFunctionBegin;
2935   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2936   C->ops->productsymbolic = MatProductSymbolic_AB;
2937   PetscFunctionReturn(0);
2938 }
2939 #endif
2940 
2941 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2942 {
2943   Mat_Product *product = C->product;
2944   Mat         A = product->A,B=product->B;
2945 
2946   PetscFunctionBegin;
2947   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2948     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2949   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2950   C->ops->productsymbolic = MatProductSymbolic_AtB;
2951   PetscFunctionReturn(0);
2952 }
2953 
2954 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2955 {
2956   PetscErrorCode ierr;
2957   Mat_Product    *product = C->product;
2958   const char     *algTypes[2] = {"allgatherv","cyclic"};
2959   PetscInt       alg,nalg = 2;
2960   PetscBool      flg = PETSC_FALSE;
2961 
2962   PetscFunctionBegin;
2963   /* Set default algorithm */
2964   alg = 0; /* default is allgatherv */
2965   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2966   if (flg) {
2967     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2968   }
2969 
2970   /* Get runtime option */
2971   if (product->api_user) {
2972     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2973     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2974     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2975   } else {
2976     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2977     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2978     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2979   }
2980   if (flg) {
2981     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2982   }
2983 
2984   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2985   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2990 {
2991   PetscErrorCode ierr;
2992   Mat_Product    *product = C->product;
2993 
2994   PetscFunctionBegin;
2995   switch (product->type) {
2996 #if defined(PETSC_HAVE_ELEMENTAL)
2997   case MATPRODUCT_AB:
2998     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2999     break;
3000 #endif
3001   case MATPRODUCT_AtB:
3002     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3003     break;
3004   case MATPRODUCT_ABt:
3005     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3006     break;
3007   default:
3008     break;
3009   }
3010   PetscFunctionReturn(0);
3011 }
3012