xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision cf0762375add12fa0f124e50db9eeb4edce3f335)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
46   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
47   PetscErrorCode ierr;
48 
49   PetscFunctionBegin;
50   ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
55 {
56   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
57   PetscErrorCode ierr;
58   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
59 
60   PetscFunctionBegin;
61   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
62   lrow = row - rstart;
63   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
68 {
69   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
70   PetscErrorCode ierr;
71   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
72 
73   PetscFunctionBegin;
74   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
75   lrow = row - rstart;
76   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
81 {
82   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
83   PetscErrorCode ierr;
84   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
85   PetscScalar    *array;
86   MPI_Comm       comm;
87   PetscBool      flg;
88   Mat            B;
89 
90   PetscFunctionBegin;
91   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
92   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
93   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
94   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
95 
96     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
97     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
98     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
99     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
100     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
101     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
102     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
103     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
104     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
105     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
106     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
107     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
108     *a   = B;
109     ierr = MatDestroy(&B);CHKERRQ(ierr);
110   } else *a = B;
111   PetscFunctionReturn(0);
112 }
113 
114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
117   PetscErrorCode ierr;
118   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
119   PetscBool      roworiented = A->roworiented;
120 
121   PetscFunctionBegin;
122   for (i=0; i<m; i++) {
123     if (idxm[i] < 0) continue;
124     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
125     if (idxm[i] >= rstart && idxm[i] < rend) {
126       row = idxm[i] - rstart;
127       if (roworiented) {
128         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
129       } else {
130         for (j=0; j<n; j++) {
131           if (idxn[j] < 0) continue;
132           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
133           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
134         }
135       }
136     } else if (!A->donotstash) {
137       mat->assembled = PETSC_FALSE;
138       if (roworiented) {
139         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
140       } else {
141         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
142       }
143     }
144   }
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
149 {
150   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
151   PetscErrorCode ierr;
152   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
153 
154   PetscFunctionBegin;
155   for (i=0; i<m; i++) {
156     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
157     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
158     if (idxm[i] >= rstart && idxm[i] < rend) {
159       row = idxm[i] - rstart;
160       for (j=0; j<n; j++) {
161         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
162         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
163         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
164       }
165     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
181 {
182   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
183   PetscBool      iscuda;
184   PetscErrorCode ierr;
185 
186   PetscFunctionBegin;
187   if (!a->A) {
188     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
189     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
190     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
191     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
192     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
193     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
194     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
195     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
196   }
197   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
230   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
235 {
236   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
237   PetscErrorCode ierr;
238 
239   PetscFunctionBegin;
240   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
241   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
242   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
247 {
248   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
253   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
254   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
259 {
260   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
261   PetscErrorCode ierr;
262 
263   PetscFunctionBegin;
264   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
265   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
266   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
271 {
272   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
273   PetscErrorCode    ierr;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
284   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
285   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
288   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
289   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
290   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
291   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
292   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
299   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
309     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
310     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
311     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
317   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
318   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
326   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
327 
328   /* Assemble the matrices so that the correct flags are set */
329   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331 
332   /* Free work space */
333   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
334   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
335   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343   PetscErrorCode ierr;
344 
345   PetscFunctionBegin;
346   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
351 {
352   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
361 {
362   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
363   PetscErrorCode ierr;
364 
365   PetscFunctionBegin;
366   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
371 {
372   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
373   PetscErrorCode ierr;
374   PetscInt       nstash,reallocs;
375 
376   PetscFunctionBegin;
377   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
378 
379   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
380   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
381   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
386 {
387   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
388   PetscErrorCode ierr;
389   PetscInt       i,*row,*col,flg,j,rstart,ncols;
390   PetscMPIInt    n;
391   PetscScalar    *val;
392 
393   PetscFunctionBegin;
394   if (!mdn->donotstash && !mat->nooffprocentries) {
395     /*  wait on receives */
396     while (1) {
397       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
398       if (!flg) break;
399 
400       for (i=0; i<n;) {
401         /* Now identify the consecutive vals belonging to the same row */
402         for (j=i,rstart=row[j]; j<n; j++) {
403           if (row[j] != rstart) break;
404         }
405         if (j < n) ncols = j-i;
406         else       ncols = n-i;
407         /* Now assemble all these values with a single function call */
408         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
409         i    = j;
410       }
411     }
412     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
413   }
414 
415   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
417 
418   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
419     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
425 {
426   PetscErrorCode ierr;
427   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
428 
429   PetscFunctionBegin;
430   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
435 {
436   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
437   PetscErrorCode    ierr;
438   PetscInt          i,len,*lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
449     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
450     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
451     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
452     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
453   }
454   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
455   if (diag != 0.0) {
456     Vec d;
457 
458     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
459     ierr = VecSet(d,diag);CHKERRQ(ierr);
460     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
461     ierr = VecDestroy(&d);CHKERRQ(ierr);
462   }
463   ierr = PetscFree(lrows);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode    ierr;
476   const PetscScalar *ax;
477   PetscScalar       *ay;
478   PetscMemType      axmtype,aymtype;
479 
480   PetscFunctionBegin;
481   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
482   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
483   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr);
484   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr);
485   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
486   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
487   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
494   PetscErrorCode    ierr;
495   const PetscScalar *ax;
496   PetscScalar       *ay;
497   PetscMemType      axmtype,aymtype;
498 
499   PetscFunctionBegin;
500   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
501   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
502   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr);
503   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr);
504   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
505   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
506   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode    ierr;
514   const PetscScalar *ax;
515   PetscScalar       *ay;
516   PetscMemType      axmtype,aymtype;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
520   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
522   ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr);
523   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
524   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
525   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
526   ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
533   PetscErrorCode    ierr;
534   const PetscScalar *ax;
535   PetscScalar       *ay;
536   PetscMemType      axmtype,aymtype;
537 
538   PetscFunctionBegin;
539   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
540   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
541   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
542   ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr);
543   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
544   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
545   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
546   ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
551 {
552   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
553   PetscErrorCode    ierr;
554   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
555   PetscScalar       *x,zero = 0.0;
556   const PetscScalar *av;
557 
558   PetscFunctionBegin;
559   ierr = VecSet(v,zero);CHKERRQ(ierr);
560   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
561   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
562   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
563   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
564   radd = A->rmap->rstart*m;
565   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
566   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
567   for (i=0; i<len; i++) {
568     x[i] = av[radd + i*lda + i];
569   }
570   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
571   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatDestroy_MPIDense(Mat mat)
576 {
577   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
578   PetscErrorCode ierr;
579 
580   PetscFunctionBegin;
581 #if defined(PETSC_USE_LOG)
582   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
583 #endif
584   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
585   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
586   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
587   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
588   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
589   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
590   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
591   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
592 
593   ierr = PetscFree(mat->data);CHKERRQ(ierr);
594   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
595 
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
609 #if defined(PETSC_HAVE_ELEMENTAL)
610   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
611 #endif
612 #if defined(PETSC_HAVE_SCALAPACK)
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
614 #endif
615   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
616   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
617   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
618 #if defined (PETSC_HAVE_CUDA)
619   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
620   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
621 #endif
622   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
623   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
624   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
625   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
626   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
627   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
628   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
630   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
631   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
636 
637 #include <petscdraw.h>
638 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
639 {
640   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
641   PetscErrorCode    ierr;
642   PetscMPIInt       rank;
643   PetscViewerType   vtype;
644   PetscBool         iascii,isdraw;
645   PetscViewer       sviewer;
646   PetscViewerFormat format;
647 
648   PetscFunctionBegin;
649   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
650   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
651   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
652   if (iascii) {
653     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
654     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
655     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
656       MatInfo info;
657       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
658       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
659       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
660       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
661       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
662       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
663       PetscFunctionReturn(0);
664     } else if (format == PETSC_VIEWER_ASCII_INFO) {
665       PetscFunctionReturn(0);
666     }
667   } else if (isdraw) {
668     PetscDraw draw;
669     PetscBool isnull;
670 
671     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
672     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
673     if (isnull) PetscFunctionReturn(0);
674   }
675 
676   {
677     /* assemble the entire matrix onto first processor. */
678     Mat         A;
679     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
680     PetscInt    *cols;
681     PetscScalar *vals;
682 
683     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
684     if (!rank) {
685       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
686     } else {
687       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
688     }
689     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
690     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
691     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
692     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
693 
694     /* Copy the matrix ... This isn't the most efficient means,
695        but it's quick for now */
696     A->insertmode = INSERT_VALUES;
697 
698     row = mat->rmap->rstart;
699     m   = mdn->A->rmap->n;
700     for (i=0; i<m; i++) {
701       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
702       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
703       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
704       row++;
705     }
706 
707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
715     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
716     ierr = MatDestroy(&A);CHKERRQ(ierr);
717   }
718   PetscFunctionReturn(0);
719 }
720 
721 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
722 {
723   PetscErrorCode ierr;
724   PetscBool      iascii,isbinary,isdraw,issocket;
725 
726   PetscFunctionBegin;
727   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
728   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
729   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
730   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
731 
732   if (iascii || issocket || isdraw) {
733     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else if (isbinary) {
735     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
741 {
742   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
743   Mat            mdn  = mat->A;
744   PetscErrorCode ierr;
745   PetscLogDouble isend[5],irecv[5];
746 
747   PetscFunctionBegin;
748   info->block_size = 1.0;
749 
750   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
751 
752   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
753   isend[3] = info->memory;  isend[4] = info->mallocs;
754   if (flag == MAT_LOCAL) {
755     info->nz_used      = isend[0];
756     info->nz_allocated = isend[1];
757     info->nz_unneeded  = isend[2];
758     info->memory       = isend[3];
759     info->mallocs      = isend[4];
760   } else if (flag == MAT_GLOBAL_MAX) {
761     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
762 
763     info->nz_used      = irecv[0];
764     info->nz_allocated = irecv[1];
765     info->nz_unneeded  = irecv[2];
766     info->memory       = irecv[3];
767     info->mallocs      = irecv[4];
768   } else if (flag == MAT_GLOBAL_SUM) {
769     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
770 
771     info->nz_used      = irecv[0];
772     info->nz_allocated = irecv[1];
773     info->nz_unneeded  = irecv[2];
774     info->memory       = irecv[3];
775     info->mallocs      = irecv[4];
776   }
777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
778   info->fill_ratio_needed = 0;
779   info->factor_mallocs    = 0;
780   PetscFunctionReturn(0);
781 }
782 
783 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
784 {
785   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
786   PetscErrorCode ierr;
787 
788   PetscFunctionBegin;
789   switch (op) {
790   case MAT_NEW_NONZERO_LOCATIONS:
791   case MAT_NEW_NONZERO_LOCATION_ERR:
792   case MAT_NEW_NONZERO_ALLOCATION_ERR:
793     MatCheckPreallocated(A,1);
794     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
795     break;
796   case MAT_ROW_ORIENTED:
797     MatCheckPreallocated(A,1);
798     a->roworiented = flg;
799     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
800     break;
801   case MAT_FORCE_DIAGONAL_ENTRIES:
802   case MAT_KEEP_NONZERO_PATTERN:
803   case MAT_USE_HASH_TABLE:
804   case MAT_SORTED_FULL:
805     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
806     break;
807   case MAT_IGNORE_OFF_PROC_ENTRIES:
808     a->donotstash = flg;
809     break;
810   case MAT_SYMMETRIC:
811   case MAT_STRUCTURALLY_SYMMETRIC:
812   case MAT_HERMITIAN:
813   case MAT_SYMMETRY_ETERNAL:
814   case MAT_IGNORE_LOWER_TRIANGULAR:
815   case MAT_IGNORE_ZERO_ENTRIES:
816     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
817     break;
818   default:
819     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
825 {
826   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
827   const PetscScalar *l;
828   PetscScalar       x,*v,*vv,*r;
829   PetscErrorCode    ierr;
830   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
831 
832   PetscFunctionBegin;
833   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
834   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
835   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
836   if (ll) {
837     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
838     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
839     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
840     for (i=0; i<m; i++) {
841       x = l[i];
842       v = vv + i;
843       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
844     }
845     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
846     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
847   }
848   if (rr) {
849     const PetscScalar *ar;
850 
851     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
852     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
853     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
854     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
855     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr);
856     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr);
857     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
858     for (i=0; i<n; i++) {
859       x = r[i];
860       v = vv + i*lda;
861       for (j=0; j<m; j++) (*v++) *= x;
862     }
863     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
864     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
865   }
866   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
871 {
872   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
873   PetscErrorCode    ierr;
874   PetscInt          i,j;
875   PetscMPIInt       size;
876   PetscReal         sum = 0.0;
877   const PetscScalar *av,*v;
878 
879   PetscFunctionBegin;
880   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
881   v    = av;
882   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
883   if (size == 1) {
884     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
885   } else {
886     if (type == NORM_FROBENIUS) {
887       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
888         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
889       }
890       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
891       *nrm = PetscSqrtReal(*nrm);
892       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
893     } else if (type == NORM_1) {
894       PetscReal *tmp,*tmp2;
895       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
896       *nrm = 0.0;
897       v    = av;
898       for (j=0; j<mdn->A->cmap->n; j++) {
899         for (i=0; i<mdn->A->rmap->n; i++) {
900           tmp[j] += PetscAbsScalar(*v);  v++;
901         }
902       }
903       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
904       for (j=0; j<A->cmap->N; j++) {
905         if (tmp2[j] > *nrm) *nrm = tmp2[j];
906       }
907       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
908       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
909     } else if (type == NORM_INFINITY) { /* max row norm */
910       PetscReal ntemp;
911       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
912       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
913     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
914   }
915   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
920 {
921   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
922   Mat            B;
923   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
924   PetscErrorCode ierr;
925   PetscInt       j,i,lda;
926   PetscScalar    *v;
927 
928   PetscFunctionBegin;
929   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
930     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
931     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
932     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
933     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
934   } else B = *matout;
935 
936   m    = a->A->rmap->n; n = a->A->cmap->n;
937   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
938   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
939   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
940   for (i=0; i<m; i++) rwork[i] = rstart + i;
941   for (j=0; j<n; j++) {
942     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
943     v   += lda;
944   }
945   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
946   ierr = PetscFree(rwork);CHKERRQ(ierr);
947   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
948   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
949   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
950     *matout = B;
951   } else {
952     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
958 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
959 
960 PetscErrorCode MatSetUp_MPIDense(Mat A)
961 {
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
966   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
967   if (!A->preallocated) {
968     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
969   }
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
974 {
975   PetscErrorCode ierr;
976   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
977 
978   PetscFunctionBegin;
979   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatConjugate_MPIDense(Mat mat)
984 {
985   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = MatConjugate(a->A);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 PetscErrorCode MatRealPart_MPIDense(Mat A)
994 {
995   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
996   PetscErrorCode ierr;
997 
998   PetscFunctionBegin;
999   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1004 {
1005   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1014 {
1015   PetscErrorCode ierr;
1016   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1017 
1018   PetscFunctionBegin;
1019   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1020   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1021   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1026 
1027 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1028 {
1029   PetscErrorCode ierr;
1030   PetscInt       i,n;
1031   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1032   PetscReal      *work;
1033 
1034   PetscFunctionBegin;
1035   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1036   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1037   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1038   if (type == NORM_2) {
1039     for (i=0; i<n; i++) work[i] *= work[i];
1040   }
1041   if (type == NORM_INFINITY) {
1042     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1043   } else {
1044     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1045   }
1046   ierr = PetscFree(work);CHKERRQ(ierr);
1047   if (type == NORM_2) {
1048     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1049   }
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #if defined(PETSC_HAVE_CUDA)
1054 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1055 {
1056   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1057   PetscErrorCode ierr;
1058   PetscInt       lda;
1059 
1060   PetscFunctionBegin;
1061   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1062   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1063   if (!a->cvec) {
1064     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1065     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1066   }
1067   a->vecinuse = col + 1;
1068   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1069   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1070   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1071   *v   = a->cvec;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1076 {
1077   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1078   PetscErrorCode ierr;
1079 
1080   PetscFunctionBegin;
1081   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1082   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1083   a->vecinuse = 0;
1084   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1085   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1086   *v   = NULL;
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1091 {
1092   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1093   PetscInt       lda;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1098   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1099   if (!a->cvec) {
1100     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1101     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1102   }
1103   a->vecinuse = col + 1;
1104   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1105   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1106   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1107   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1108   *v   = a->cvec;
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1113 {
1114   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1115   PetscErrorCode ierr;
1116 
1117   PetscFunctionBegin;
1118   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1119   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1120   a->vecinuse = 0;
1121   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1122   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1123   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1124   *v   = NULL;
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1129 {
1130   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1131   PetscErrorCode ierr;
1132   PetscInt       lda;
1133 
1134   PetscFunctionBegin;
1135   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1136   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1137   if (!a->cvec) {
1138     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1139     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1140   }
1141   a->vecinuse = col + 1;
1142   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1143   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1144   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1145   *v   = a->cvec;
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1150 {
1151   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1152   PetscErrorCode ierr;
1153 
1154   PetscFunctionBegin;
1155   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1156   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1157   a->vecinuse = 0;
1158   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1159   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1160   *v   = NULL;
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1165 {
1166   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1167   PetscErrorCode ierr;
1168 
1169   PetscFunctionBegin;
1170   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1171   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1172   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1177 {
1178   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1179   PetscErrorCode ierr;
1180 
1181   PetscFunctionBegin;
1182   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1183   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1184   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1189 {
1190   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1195   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1196   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1201 {
1202   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1211 {
1212   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1213   PetscErrorCode ierr;
1214 
1215   PetscFunctionBegin;
1216   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1221 {
1222   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1231 {
1232   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1241 {
1242   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1243   PetscErrorCode ierr;
1244 
1245   PetscFunctionBegin;
1246   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1251 {
1252   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1261 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1262 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1263 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1264 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1265 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1266 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1267 
1268 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1269 {
1270   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1275   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1276   if (d->A) {
1277     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1278   }
1279   mat->boundtocpu = bind;
1280   if (!bind) {
1281     PetscBool iscuda;
1282 
1283     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1284     if (!iscuda) {
1285       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1286     }
1287     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1288     if (!iscuda) {
1289       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1290     }
1291     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1292     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1293     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1294     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1295     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1296     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1297   } else {
1298     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1299     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1300     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1301     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1302     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1303     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1304   }
1305   if (d->cmat) {
1306     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1307   }
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1312 {
1313   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1314   PetscErrorCode ierr;
1315   PetscBool      iscuda;
1316 
1317   PetscFunctionBegin;
1318   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1319   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1320   if (!iscuda) PetscFunctionReturn(0);
1321   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1322   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1323   if (!d->A) {
1324     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1325     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1326     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1327   }
1328   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1329   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1330   A->preallocated = PETSC_TRUE;
1331   PetscFunctionReturn(0);
1332 }
1333 #endif
1334 
1335 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1336 {
1337   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1338   PetscErrorCode ierr;
1339 
1340   PetscFunctionBegin;
1341   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1346 {
1347   PetscFunctionBegin;
1348   *missing = PETSC_FALSE;
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1353 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1354 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1355 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1356 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1357 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1358 
1359 /* -------------------------------------------------------------------*/
1360 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1361                                         MatGetRow_MPIDense,
1362                                         MatRestoreRow_MPIDense,
1363                                         MatMult_MPIDense,
1364                                 /*  4*/ MatMultAdd_MPIDense,
1365                                         MatMultTranspose_MPIDense,
1366                                         MatMultTransposeAdd_MPIDense,
1367                                         NULL,
1368                                         NULL,
1369                                         NULL,
1370                                 /* 10*/ NULL,
1371                                         NULL,
1372                                         NULL,
1373                                         NULL,
1374                                         MatTranspose_MPIDense,
1375                                 /* 15*/ MatGetInfo_MPIDense,
1376                                         MatEqual_MPIDense,
1377                                         MatGetDiagonal_MPIDense,
1378                                         MatDiagonalScale_MPIDense,
1379                                         MatNorm_MPIDense,
1380                                 /* 20*/ MatAssemblyBegin_MPIDense,
1381                                         MatAssemblyEnd_MPIDense,
1382                                         MatSetOption_MPIDense,
1383                                         MatZeroEntries_MPIDense,
1384                                 /* 24*/ MatZeroRows_MPIDense,
1385                                         NULL,
1386                                         NULL,
1387                                         NULL,
1388                                         NULL,
1389                                 /* 29*/ MatSetUp_MPIDense,
1390                                         NULL,
1391                                         NULL,
1392                                         MatGetDiagonalBlock_MPIDense,
1393                                         NULL,
1394                                 /* 34*/ MatDuplicate_MPIDense,
1395                                         NULL,
1396                                         NULL,
1397                                         NULL,
1398                                         NULL,
1399                                 /* 39*/ MatAXPY_MPIDense,
1400                                         MatCreateSubMatrices_MPIDense,
1401                                         NULL,
1402                                         MatGetValues_MPIDense,
1403                                         MatCopy_MPIDense,
1404                                 /* 44*/ NULL,
1405                                         MatScale_MPIDense,
1406                                         MatShift_Basic,
1407                                         NULL,
1408                                         NULL,
1409                                 /* 49*/ MatSetRandom_MPIDense,
1410                                         NULL,
1411                                         NULL,
1412                                         NULL,
1413                                         NULL,
1414                                 /* 54*/ NULL,
1415                                         NULL,
1416                                         NULL,
1417                                         NULL,
1418                                         NULL,
1419                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1420                                         MatDestroy_MPIDense,
1421                                         MatView_MPIDense,
1422                                         NULL,
1423                                         NULL,
1424                                 /* 64*/ NULL,
1425                                         NULL,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                 /* 69*/ NULL,
1430                                         NULL,
1431                                         NULL,
1432                                         NULL,
1433                                         NULL,
1434                                 /* 74*/ NULL,
1435                                         NULL,
1436                                         NULL,
1437                                         NULL,
1438                                         NULL,
1439                                 /* 79*/ NULL,
1440                                         NULL,
1441                                         NULL,
1442                                         NULL,
1443                                 /* 83*/ MatLoad_MPIDense,
1444                                         NULL,
1445                                         NULL,
1446                                         NULL,
1447                                         NULL,
1448                                         NULL,
1449                                 /* 89*/ NULL,
1450                                         NULL,
1451                                         NULL,
1452                                         NULL,
1453                                         NULL,
1454                                 /* 94*/ NULL,
1455                                         NULL,
1456                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1457                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1458                                         NULL,
1459                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1460                                         NULL,
1461                                         NULL,
1462                                         MatConjugate_MPIDense,
1463                                         NULL,
1464                                 /*104*/ NULL,
1465                                         MatRealPart_MPIDense,
1466                                         MatImaginaryPart_MPIDense,
1467                                         NULL,
1468                                         NULL,
1469                                 /*109*/ NULL,
1470                                         NULL,
1471                                         NULL,
1472                                         MatGetColumnVector_MPIDense,
1473                                         MatMissingDiagonal_MPIDense,
1474                                 /*114*/ NULL,
1475                                         NULL,
1476                                         NULL,
1477                                         NULL,
1478                                         NULL,
1479                                 /*119*/ NULL,
1480                                         NULL,
1481                                         NULL,
1482                                         NULL,
1483                                         NULL,
1484                                 /*124*/ NULL,
1485                                         MatGetColumnNorms_MPIDense,
1486                                         NULL,
1487                                         NULL,
1488                                         NULL,
1489                                 /*129*/ NULL,
1490                                         NULL,
1491                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1492                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1493                                         NULL,
1494                                 /*134*/ NULL,
1495                                         NULL,
1496                                         NULL,
1497                                         NULL,
1498                                         NULL,
1499                                 /*139*/ NULL,
1500                                         NULL,
1501                                         NULL,
1502                                         NULL,
1503                                         NULL,
1504                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1505                                 /*145*/ NULL,
1506                                         NULL,
1507                                         NULL
1508 };
1509 
1510 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1511 {
1512   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1513   PetscBool      iscuda;
1514   PetscErrorCode ierr;
1515 
1516   PetscFunctionBegin;
1517   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1518   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1519   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1520   if (!a->A) {
1521     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1522     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1523     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1524   }
1525   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1526   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1527   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1528   mat->preallocated = PETSC_TRUE;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1533 {
1534   Mat            B,C;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr);
1539   ierr = MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1540   ierr = MatDestroy(&C);CHKERRQ(ierr);
1541   if (reuse == MAT_REUSE_MATRIX) {
1542     C = *newmat;
1543   } else C = NULL;
1544   ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1545   ierr = MatDestroy(&B);CHKERRQ(ierr);
1546   if (reuse == MAT_INPLACE_MATRIX) {
1547     ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr);
1548   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1553 {
1554   Mat            B,C;
1555   PetscErrorCode ierr;
1556 
1557   PetscFunctionBegin;
1558   ierr = MatDenseGetLocalMatrix(A,&C);CHKERRQ(ierr);
1559   ierr = MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1560   if (reuse == MAT_REUSE_MATRIX) {
1561     C = *newmat;
1562   } else C = NULL;
1563   ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1564   ierr = MatDestroy(&B);CHKERRQ(ierr);
1565   if (reuse == MAT_INPLACE_MATRIX) {
1566     ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr);
1567   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #if defined(PETSC_HAVE_ELEMENTAL)
1572 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1573 {
1574   Mat            mat_elemental;
1575   PetscErrorCode ierr;
1576   PetscScalar    *v;
1577   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1578 
1579   PetscFunctionBegin;
1580   if (reuse == MAT_REUSE_MATRIX) {
1581     mat_elemental = *newmat;
1582     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1583   } else {
1584     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1585     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1586     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1587     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1588     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1589   }
1590 
1591   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1592   for (i=0; i<N; i++) cols[i] = i;
1593   for (i=0; i<m; i++) rows[i] = rstart + i;
1594 
1595   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1596   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1597   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1598   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1599   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1600   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1601   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1602 
1603   if (reuse == MAT_INPLACE_MATRIX) {
1604     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1605   } else {
1606     *newmat = mat_elemental;
1607   }
1608   PetscFunctionReturn(0);
1609 }
1610 #endif
1611 
1612 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1613 {
1614   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1615   PetscErrorCode ierr;
1616 
1617   PetscFunctionBegin;
1618   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1623 {
1624   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1625   PetscErrorCode ierr;
1626 
1627   PetscFunctionBegin;
1628   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1633 {
1634   PetscErrorCode ierr;
1635   Mat_MPIDense   *mat;
1636   PetscInt       m,nloc,N;
1637 
1638   PetscFunctionBegin;
1639   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1640   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1641   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1642     PetscInt sum;
1643 
1644     if (n == PETSC_DECIDE) {
1645       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1646     }
1647     /* Check sum(n) = N */
1648     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1649     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1650 
1651     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1652     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
1653   }
1654 
1655   /* numeric phase */
1656   mat = (Mat_MPIDense*)(*outmat)->data;
1657   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1658   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1659   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 #if defined(PETSC_HAVE_CUDA)
1664 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1665 {
1666   Mat            B;
1667   Mat_MPIDense   *m;
1668   PetscErrorCode ierr;
1669 
1670   PetscFunctionBegin;
1671   if (reuse == MAT_INITIAL_MATRIX) {
1672     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1673   } else if (reuse == MAT_REUSE_MATRIX) {
1674     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1675   }
1676 
1677   B    = *newmat;
1678   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1679   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1680   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1681   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1696   m    = (Mat_MPIDense*)(B)->data;
1697   if (m->A) {
1698     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1699     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1700   }
1701   B->ops->bindtocpu = NULL;
1702   B->offloadmask    = PETSC_OFFLOAD_CPU;
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1707 {
1708   Mat            B;
1709   Mat_MPIDense   *m;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   if (reuse == MAT_INITIAL_MATRIX) {
1714     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1715   } else if (reuse == MAT_REUSE_MATRIX) {
1716     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1717   }
1718 
1719   B    = *newmat;
1720   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1721   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1722   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1723   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1724   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1725   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1726   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1727   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1728   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1729   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1730   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1731   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1732   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1733   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1734   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1735   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1736   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1737   m    = (Mat_MPIDense*)(B)->data;
1738   if (m->A) {
1739     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1740     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1741     B->offloadmask = PETSC_OFFLOAD_BOTH;
1742   } else {
1743     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1744   }
1745   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1746 
1747   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1748   PetscFunctionReturn(0);
1749 }
1750 #endif
1751 
1752 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1753 {
1754   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1755   PetscErrorCode ierr;
1756   PetscInt       lda;
1757 
1758   PetscFunctionBegin;
1759   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1760   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1761   if (!a->cvec) {
1762     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1763   }
1764   a->vecinuse = col + 1;
1765   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1766   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1767   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1768   *v   = a->cvec;
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1773 {
1774   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1779   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1780   a->vecinuse = 0;
1781   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1782   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1783   *v   = NULL;
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1788 {
1789   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1790   PetscErrorCode ierr;
1791   PetscInt       lda;
1792 
1793   PetscFunctionBegin;
1794   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1795   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1796   if (!a->cvec) {
1797     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1798   }
1799   a->vecinuse = col + 1;
1800   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1801   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1802   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1803   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1804   *v   = a->cvec;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1809 {
1810   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1811   PetscErrorCode ierr;
1812 
1813   PetscFunctionBegin;
1814   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1815   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1816   a->vecinuse = 0;
1817   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1818   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1819   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1820   *v   = NULL;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1825 {
1826   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1827   PetscErrorCode ierr;
1828   PetscInt       lda;
1829 
1830   PetscFunctionBegin;
1831   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1832   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1833   if (!a->cvec) {
1834     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1835   }
1836   a->vecinuse = col + 1;
1837   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1838   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1839   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1840   *v   = a->cvec;
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1845 {
1846   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1847   PetscErrorCode ierr;
1848 
1849   PetscFunctionBegin;
1850   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1851   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1852   a->vecinuse = 0;
1853   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1854   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1855   *v   = NULL;
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1860 {
1861   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1862   Mat_MPIDense   *c;
1863   PetscErrorCode ierr;
1864   MPI_Comm       comm;
1865   PetscBool      setup = PETSC_FALSE;
1866 
1867   PetscFunctionBegin;
1868   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1869   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1870   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1871   if (!a->cmat) {
1872     setup = PETSC_TRUE;
1873     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1874     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1875     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1876     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1877     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1878     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1879   } else if (cend-cbegin != a->cmat->cmap->N) {
1880     setup = PETSC_TRUE;
1881     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1882     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1883     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1884     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1885   }
1886   c = (Mat_MPIDense*)a->cmat->data;
1887   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1888   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1889   if (setup) { /* do we really need this? */
1890     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1891   }
1892   a->cmat->preallocated = PETSC_TRUE;
1893   a->cmat->assembled = PETSC_TRUE;
1894   a->matinuse = cbegin + 1;
1895   *v = a->cmat;
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1900 {
1901   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1902   Mat_MPIDense   *c;
1903   PetscErrorCode ierr;
1904 
1905   PetscFunctionBegin;
1906   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1907   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1908   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1909   a->matinuse = 0;
1910   c    = (Mat_MPIDense*)a->cmat->data;
1911   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1912   *v   = NULL;
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 /*MC
1917    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1918 
1919    Options Database Keys:
1920 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1921 
1922   Level: beginner
1923 
1924 .seealso: MatCreateDense()
1925 
1926 M*/
1927 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1928 {
1929   Mat_MPIDense   *a;
1930   PetscErrorCode ierr;
1931 
1932   PetscFunctionBegin;
1933   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1934   mat->data = (void*)a;
1935   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1936 
1937   mat->insertmode = NOT_SET_VALUES;
1938 
1939   /* build cache for off array entries formed */
1940   a->donotstash = PETSC_FALSE;
1941 
1942   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1943 
1944   /* stuff used for matrix vector multiply */
1945   a->lvec        = NULL;
1946   a->Mvctx       = NULL;
1947   a->roworiented = PETSC_TRUE;
1948 
1949   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1950   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1951   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1952   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1953   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1954   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1955   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1956   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1957   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1958   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1959   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1960   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1961   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1962   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1963   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1964   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1965   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1966   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1967   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1968   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
1969   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ);CHKERRQ(ierr);
1970 #if defined(PETSC_HAVE_ELEMENTAL)
1971   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1972 #endif
1973 #if defined(PETSC_HAVE_SCALAPACK)
1974   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1975 #endif
1976 #if defined(PETSC_HAVE_CUDA)
1977   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1978 #endif
1979   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1980   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1981   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1982 #if defined(PETSC_HAVE_CUDA)
1983   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1984   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1985 #endif
1986 
1987   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1988   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1989   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 /*MC
1994    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1995 
1996    Options Database Keys:
1997 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1998 
1999   Level: beginner
2000 
2001 .seealso:
2002 
2003 M*/
2004 #if defined(PETSC_HAVE_CUDA)
2005 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2006 {
2007   PetscErrorCode ierr;
2008 
2009   PetscFunctionBegin;
2010   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
2011   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
2012   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
2013   PetscFunctionReturn(0);
2014 }
2015 #endif
2016 
2017 /*MC
2018    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2019 
2020    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
2021    and MATMPIDENSE otherwise.
2022 
2023    Options Database Keys:
2024 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
2025 
2026   Level: beginner
2027 
2028 
2029 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
2030 M*/
2031 
2032 /*MC
2033    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2034 
2035    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
2036    and MATMPIDENSECUDA otherwise.
2037 
2038    Options Database Keys:
2039 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
2040 
2041   Level: beginner
2042 
2043 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
2044 M*/
2045 
2046 /*@C
2047    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2048 
2049    Collective
2050 
2051    Input Parameters:
2052 .  B - the matrix
2053 -  data - optional location of matrix data.  Set data=NULL for PETSc
2054    to control all matrix memory allocation.
2055 
2056    Notes:
2057    The dense format is fully compatible with standard Fortran 77
2058    storage by columns.
2059 
2060    The data input variable is intended primarily for Fortran programmers
2061    who wish to allocate their own matrix memory space.  Most users should
2062    set data=NULL.
2063 
2064    Level: intermediate
2065 
2066 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2067 @*/
2068 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2069 {
2070   PetscErrorCode ierr;
2071 
2072   PetscFunctionBegin;
2073   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2074   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 /*@
2079    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2080    array provided by the user. This is useful to avoid copying an array
2081    into a matrix
2082 
2083    Not Collective
2084 
2085    Input Parameters:
2086 +  mat - the matrix
2087 -  array - the array in column major order
2088 
2089    Notes:
2090    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2091    freed when the matrix is destroyed.
2092 
2093    Level: developer
2094 
2095 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2096 
2097 @*/
2098 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2099 {
2100   PetscErrorCode ierr;
2101 
2102   PetscFunctionBegin;
2103   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2104   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2105   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2106 #if defined(PETSC_HAVE_CUDA)
2107   mat->offloadmask = PETSC_OFFLOAD_CPU;
2108 #endif
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 /*@
2113    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2114 
2115    Not Collective
2116 
2117    Input Parameters:
2118 .  mat - the matrix
2119 
2120    Notes:
2121    You can only call this after a call to MatDensePlaceArray()
2122 
2123    Level: developer
2124 
2125 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2126 
2127 @*/
2128 PetscErrorCode  MatDenseResetArray(Mat mat)
2129 {
2130   PetscErrorCode ierr;
2131 
2132   PetscFunctionBegin;
2133   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2134   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2135   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 /*@
2140    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2141    array provided by the user. This is useful to avoid copying an array
2142    into a matrix
2143 
2144    Not Collective
2145 
2146    Input Parameters:
2147 +  mat - the matrix
2148 -  array - the array in column major order
2149 
2150    Notes:
2151    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2152    freed by the user. It will be freed when the matrix is destroyed.
2153 
2154    Level: developer
2155 
2156 .seealso: MatDenseGetArray(), VecReplaceArray()
2157 @*/
2158 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2159 {
2160   PetscErrorCode ierr;
2161 
2162   PetscFunctionBegin;
2163   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2164   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2165   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2166 #if defined(PETSC_HAVE_CUDA)
2167   mat->offloadmask = PETSC_OFFLOAD_CPU;
2168 #endif
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 #if defined(PETSC_HAVE_CUDA)
2173 /*@C
2174    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2175    array provided by the user. This is useful to avoid copying an array
2176    into a matrix
2177 
2178    Not Collective
2179 
2180    Input Parameters:
2181 +  mat - the matrix
2182 -  array - the array in column major order
2183 
2184    Notes:
2185    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2186    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2187 
2188    Level: developer
2189 
2190 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2191 @*/
2192 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2193 {
2194   PetscErrorCode ierr;
2195 
2196   PetscFunctionBegin;
2197   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2198   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2199   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2200   mat->offloadmask = PETSC_OFFLOAD_GPU;
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 /*@C
2205    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2206 
2207    Not Collective
2208 
2209    Input Parameters:
2210 .  mat - the matrix
2211 
2212    Notes:
2213    You can only call this after a call to MatDenseCUDAPlaceArray()
2214 
2215    Level: developer
2216 
2217 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2218 
2219 @*/
2220 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2221 {
2222   PetscErrorCode ierr;
2223 
2224   PetscFunctionBegin;
2225   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2226   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2227   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 /*@C
2232    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2233    array provided by the user. This is useful to avoid copying an array
2234    into a matrix
2235 
2236    Not Collective
2237 
2238    Input Parameters:
2239 +  mat - the matrix
2240 -  array - the array in column major order
2241 
2242    Notes:
2243    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2244    The memory passed in CANNOT be freed by the user. It will be freed
2245    when the matrix is destroyed. The array should respect the matrix leading dimension.
2246 
2247    Level: developer
2248 
2249 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2250 @*/
2251 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2252 {
2253   PetscErrorCode ierr;
2254 
2255   PetscFunctionBegin;
2256   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2257   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2258   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2259   mat->offloadmask = PETSC_OFFLOAD_GPU;
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 /*@C
2264    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2265 
2266    Not Collective
2267 
2268    Input Parameters:
2269 .  A - the matrix
2270 
2271    Output Parameters
2272 .  array - the GPU array in column major order
2273 
2274    Notes:
2275    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2276 
2277    Level: developer
2278 
2279 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2280 @*/
2281 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2282 {
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2287   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2288   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 /*@C
2293    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2294 
2295    Not Collective
2296 
2297    Input Parameters:
2298 +  A - the matrix
2299 -  array - the GPU array in column major order
2300 
2301    Notes:
2302 
2303    Level: developer
2304 
2305 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2306 @*/
2307 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2308 {
2309   PetscErrorCode ierr;
2310 
2311   PetscFunctionBegin;
2312   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2313   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2314   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2315   A->offloadmask = PETSC_OFFLOAD_GPU;
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 /*@C
2320    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2321 
2322    Not Collective
2323 
2324    Input Parameters:
2325 .  A - the matrix
2326 
2327    Output Parameters
2328 .  array - the GPU array in column major order
2329 
2330    Notes:
2331    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2332 
2333    Level: developer
2334 
2335 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2336 @*/
2337 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2338 {
2339   PetscErrorCode ierr;
2340 
2341   PetscFunctionBegin;
2342   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2343   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 /*@C
2348    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2349 
2350    Not Collective
2351 
2352    Input Parameters:
2353 +  A - the matrix
2354 -  array - the GPU array in column major order
2355 
2356    Notes:
2357    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2358 
2359    Level: developer
2360 
2361 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2362 @*/
2363 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2364 {
2365   PetscErrorCode ierr;
2366 
2367   PetscFunctionBegin;
2368   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2369   PetscFunctionReturn(0);
2370 }
2371 
2372 /*@C
2373    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2374 
2375    Not Collective
2376 
2377    Input Parameters:
2378 .  A - the matrix
2379 
2380    Output Parameters
2381 .  array - the GPU array in column major order
2382 
2383    Notes:
2384    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2385 
2386    Level: developer
2387 
2388 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2389 @*/
2390 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2391 {
2392   PetscErrorCode ierr;
2393 
2394   PetscFunctionBegin;
2395   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2396   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2397   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 /*@C
2402    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2403 
2404    Not Collective
2405 
2406    Input Parameters:
2407 +  A - the matrix
2408 -  array - the GPU array in column major order
2409 
2410    Notes:
2411 
2412    Level: developer
2413 
2414 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2415 @*/
2416 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2417 {
2418   PetscErrorCode ierr;
2419 
2420   PetscFunctionBegin;
2421   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2422   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2423   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2424   A->offloadmask = PETSC_OFFLOAD_GPU;
2425   PetscFunctionReturn(0);
2426 }
2427 #endif
2428 
2429 /*@C
2430    MatCreateDense - Creates a matrix in dense format.
2431 
2432    Collective
2433 
2434    Input Parameters:
2435 +  comm - MPI communicator
2436 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2437 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2438 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2439 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2440 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2441    to control all matrix memory allocation.
2442 
2443    Output Parameter:
2444 .  A - the matrix
2445 
2446    Notes:
2447    The dense format is fully compatible with standard Fortran 77
2448    storage by columns.
2449    Note that, although local portions of the matrix are stored in column-major
2450    order, the matrix is partitioned across MPI ranks by row.
2451 
2452    The data input variable is intended primarily for Fortran programmers
2453    who wish to allocate their own matrix memory space.  Most users should
2454    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2455 
2456    The user MUST specify either the local or global matrix dimensions
2457    (possibly both).
2458 
2459    Level: intermediate
2460 
2461 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2462 @*/
2463 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2464 {
2465   PetscErrorCode ierr;
2466   PetscMPIInt    size;
2467 
2468   PetscFunctionBegin;
2469   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2470   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2471   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2472   if (size > 1) {
2473     PetscBool havedata = (PetscBool)!!data;
2474 
2475     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2476     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2477     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr);
2478     if (havedata) {  /* user provided data array, so no need to assemble */
2479       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2480       (*A)->assembled = PETSC_TRUE;
2481     }
2482   } else {
2483     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2484     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2485   }
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 #if defined(PETSC_HAVE_CUDA)
2490 /*@C
2491    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2492 
2493    Collective
2494 
2495    Input Parameters:
2496 +  comm - MPI communicator
2497 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2498 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2499 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2500 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2501 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2502    to control matrix memory allocation.
2503 
2504    Output Parameter:
2505 .  A - the matrix
2506 
2507    Notes:
2508 
2509    Level: intermediate
2510 
2511 .seealso: MatCreate(), MatCreateDense()
2512 @*/
2513 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2514 {
2515   PetscErrorCode ierr;
2516   PetscMPIInt    size;
2517 
2518   PetscFunctionBegin;
2519   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2520   PetscValidLogicalCollectiveBool(*A,!!data,6);
2521   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2522   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2523   if (size > 1) {
2524     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2525     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2526     if (data) {  /* user provided data array, so no need to assemble */
2527       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2528       (*A)->assembled = PETSC_TRUE;
2529     }
2530   } else {
2531     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2532     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2533   }
2534   PetscFunctionReturn(0);
2535 }
2536 #endif
2537 
2538 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2539 {
2540   Mat            mat;
2541   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2542   PetscErrorCode ierr;
2543 
2544   PetscFunctionBegin;
2545   *newmat = NULL;
2546   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2547   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2548   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2549   a       = (Mat_MPIDense*)mat->data;
2550 
2551   mat->factortype   = A->factortype;
2552   mat->assembled    = PETSC_TRUE;
2553   mat->preallocated = PETSC_TRUE;
2554 
2555   mat->insertmode = NOT_SET_VALUES;
2556   a->donotstash   = oldmat->donotstash;
2557 
2558   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2559   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2560 
2561   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2562   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2563   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2564 
2565   *newmat = mat;
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2570 {
2571   PetscErrorCode ierr;
2572   PetscBool      isbinary;
2573 #if defined(PETSC_HAVE_HDF5)
2574   PetscBool      ishdf5;
2575 #endif
2576 
2577   PetscFunctionBegin;
2578   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2579   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2580   /* force binary viewer to load .info file if it has not yet done so */
2581   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2582   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2583 #if defined(PETSC_HAVE_HDF5)
2584   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2585 #endif
2586   if (isbinary) {
2587     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2588 #if defined(PETSC_HAVE_HDF5)
2589   } else if (ishdf5) {
2590     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2591 #endif
2592   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2597 {
2598   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2599   Mat            a,b;
2600   PetscBool      flg;
2601   PetscErrorCode ierr;
2602 
2603   PetscFunctionBegin;
2604   a    = matA->A;
2605   b    = matB->A;
2606   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2607   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2612 {
2613   PetscErrorCode        ierr;
2614   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2615 
2616   PetscFunctionBegin;
2617   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2618   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2619   ierr = PetscFree(atb);CHKERRQ(ierr);
2620   PetscFunctionReturn(0);
2621 }
2622 
2623 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2624 {
2625   PetscErrorCode        ierr;
2626   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2627 
2628   PetscFunctionBegin;
2629   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2630   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2631   ierr = PetscFree(abt);CHKERRQ(ierr);
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2636 {
2637   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2638   Mat_TransMatMultDense *atb;
2639   PetscErrorCode        ierr;
2640   MPI_Comm              comm;
2641   PetscMPIInt           size,*recvcounts;
2642   PetscScalar           *carray,*sendbuf;
2643   const PetscScalar     *atbarray;
2644   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2645   const PetscInt        *ranges;
2646 
2647   PetscFunctionBegin;
2648   MatCheckProduct(C,3);
2649   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2650   atb = (Mat_TransMatMultDense *)C->product->data;
2651   recvcounts = atb->recvcounts;
2652   sendbuf = atb->sendbuf;
2653 
2654   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2655   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2656 
2657   /* compute atbarray = aseq^T * bseq */
2658   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2659 
2660   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2661 
2662   /* arrange atbarray into sendbuf */
2663   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2664   for (proc=0, k=0; proc<size; proc++) {
2665     for (j=0; j<cN; j++) {
2666       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2667     }
2668   }
2669   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2670 
2671   /* sum all atbarray to local values of C */
2672   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2673   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr);
2674   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2675   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2676   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2681 {
2682   PetscErrorCode        ierr;
2683   MPI_Comm              comm;
2684   PetscMPIInt           size;
2685   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2686   Mat_TransMatMultDense *atb;
2687   PetscBool             cisdense;
2688   PetscInt              i;
2689   const PetscInt        *ranges;
2690 
2691   PetscFunctionBegin;
2692   MatCheckProduct(C,3);
2693   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2694   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2695   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2696     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2697   }
2698 
2699   /* create matrix product C */
2700   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2701   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2702   if (!cisdense) {
2703     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2704   }
2705   ierr = MatSetUp(C);CHKERRQ(ierr);
2706 
2707   /* create data structure for reuse C */
2708   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2709   ierr = PetscNew(&atb);CHKERRQ(ierr);
2710   cM   = C->rmap->N;
2711   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2712   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2713   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2714 
2715   C->product->data    = atb;
2716   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2717   PetscFunctionReturn(0);
2718 }
2719 
2720 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2721 {
2722   PetscErrorCode        ierr;
2723   MPI_Comm              comm;
2724   PetscMPIInt           i, size;
2725   PetscInt              maxRows, bufsiz;
2726   PetscMPIInt           tag;
2727   PetscInt              alg;
2728   Mat_MatTransMultDense *abt;
2729   Mat_Product           *product = C->product;
2730   PetscBool             flg;
2731 
2732   PetscFunctionBegin;
2733   MatCheckProduct(C,4);
2734   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2735   /* check local size of A and B */
2736   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2737 
2738   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2739   alg  = flg ? 0 : 1;
2740 
2741   /* setup matrix product C */
2742   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2743   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2744   ierr = MatSetUp(C);CHKERRQ(ierr);
2745   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2746 
2747   /* create data structure for reuse C */
2748   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2749   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2750   ierr = PetscNew(&abt);CHKERRQ(ierr);
2751   abt->tag = tag;
2752   abt->alg = alg;
2753   switch (alg) {
2754   case 1: /* alg: "cyclic" */
2755     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2756     bufsiz = A->cmap->N * maxRows;
2757     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2758     break;
2759   default: /* alg: "allgatherv" */
2760     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2761     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2762     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2763     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2764     break;
2765   }
2766 
2767   C->product->data    = abt;
2768   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2769   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2774 {
2775   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2776   Mat_MatTransMultDense *abt;
2777   PetscErrorCode        ierr;
2778   MPI_Comm              comm;
2779   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2780   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2781   PetscInt              i,cK=A->cmap->N,k,j,bn;
2782   PetscScalar           _DOne=1.0,_DZero=0.0;
2783   const PetscScalar     *av,*bv;
2784   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2785   MPI_Request           reqs[2];
2786   const PetscInt        *ranges;
2787 
2788   PetscFunctionBegin;
2789   MatCheckProduct(C,3);
2790   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2791   abt  = (Mat_MatTransMultDense*)C->product->data;
2792   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2793   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2794   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2795   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2796   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2797   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2798   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2799   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2800   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2801   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2802   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2803   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2804   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2805   bn   = B->rmap->n;
2806   if (blda == bn) {
2807     sendbuf = (PetscScalar*)bv;
2808   } else {
2809     sendbuf = abt->buf[0];
2810     for (k = 0, i = 0; i < cK; i++) {
2811       for (j = 0; j < bn; j++, k++) {
2812         sendbuf[k] = bv[i * blda + j];
2813       }
2814     }
2815   }
2816   if (size > 1) {
2817     sendto = (rank + size - 1) % size;
2818     recvfrom = (rank + size + 1) % size;
2819   } else {
2820     sendto = recvfrom = 0;
2821   }
2822   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2823   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2824   recvisfrom = rank;
2825   for (i = 0; i < size; i++) {
2826     /* we have finished receiving in sending, bufs can be read/modified */
2827     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2828     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2829 
2830     if (nextrecvisfrom != rank) {
2831       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2832       sendsiz = cK * bn;
2833       recvsiz = cK * nextbn;
2834       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2835       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr);
2836       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr);
2837     }
2838 
2839     /* local aseq * sendbuf^T */
2840     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2841     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2842 
2843     if (nextrecvisfrom != rank) {
2844       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2845       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
2846     }
2847     bn = nextbn;
2848     recvisfrom = nextrecvisfrom;
2849     sendbuf = recvbuf;
2850   }
2851   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2852   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2853   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2854   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2855   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2860 {
2861   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2862   Mat_MatTransMultDense *abt;
2863   PetscErrorCode        ierr;
2864   MPI_Comm              comm;
2865   PetscMPIInt           size;
2866   PetscScalar           *cv, *sendbuf, *recvbuf;
2867   const PetscScalar     *av,*bv;
2868   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2869   PetscScalar           _DOne=1.0,_DZero=0.0;
2870   PetscBLASInt          cm, cn, ck, alda, clda;
2871 
2872   PetscFunctionBegin;
2873   MatCheckProduct(C,3);
2874   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2875   abt  = (Mat_MatTransMultDense*)C->product->data;
2876   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2877   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2878   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2879   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2880   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2881   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2882   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2883   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2884   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2885   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2886   /* copy transpose of B into buf[0] */
2887   bn      = B->rmap->n;
2888   sendbuf = abt->buf[0];
2889   recvbuf = abt->buf[1];
2890   for (k = 0, j = 0; j < bn; j++) {
2891     for (i = 0; i < cK; i++, k++) {
2892       sendbuf[k] = bv[i * blda + j];
2893     }
2894   }
2895   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2896   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr);
2897   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2898   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2899   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2900   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2901   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2902   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2903   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2904   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2905   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2906   PetscFunctionReturn(0);
2907 }
2908 
2909 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2910 {
2911   Mat_MatTransMultDense *abt;
2912   PetscErrorCode        ierr;
2913 
2914   PetscFunctionBegin;
2915   MatCheckProduct(C,3);
2916   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2917   abt = (Mat_MatTransMultDense*)C->product->data;
2918   switch (abt->alg) {
2919   case 1:
2920     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2921     break;
2922   default:
2923     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2924     break;
2925   }
2926   PetscFunctionReturn(0);
2927 }
2928 
2929 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2930 {
2931   PetscErrorCode   ierr;
2932   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2933 
2934   PetscFunctionBegin;
2935   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2936   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2937   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2938   ierr = PetscFree(ab);CHKERRQ(ierr);
2939   PetscFunctionReturn(0);
2940 }
2941 
2942 #if defined(PETSC_HAVE_ELEMENTAL)
2943 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2944 {
2945   PetscErrorCode   ierr;
2946   Mat_MatMultDense *ab;
2947 
2948   PetscFunctionBegin;
2949   MatCheckProduct(C,3);
2950   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2951   ab   = (Mat_MatMultDense*)C->product->data;
2952   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2953   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2954   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2955   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2956   PetscFunctionReturn(0);
2957 }
2958 
2959 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2960 {
2961   PetscErrorCode   ierr;
2962   Mat              Ae,Be,Ce;
2963   Mat_MatMultDense *ab;
2964 
2965   PetscFunctionBegin;
2966   MatCheckProduct(C,4);
2967   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2968   /* check local size of A and B */
2969   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2970     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2971   }
2972 
2973   /* create elemental matrices Ae and Be */
2974   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2975   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2976   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2977   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2978   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2979 
2980   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2981   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2982   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2983   ierr = MatSetUp(Be);CHKERRQ(ierr);
2984   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2985 
2986   /* compute symbolic Ce = Ae*Be */
2987   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2988   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2989 
2990   /* setup C */
2991   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2992   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2993   ierr = MatSetUp(C);CHKERRQ(ierr);
2994 
2995   /* create data structure for reuse Cdense */
2996   ierr = PetscNew(&ab);CHKERRQ(ierr);
2997   ab->Ae = Ae;
2998   ab->Be = Be;
2999   ab->Ce = Ce;
3000 
3001   C->product->data    = ab;
3002   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
3003   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3004   PetscFunctionReturn(0);
3005 }
3006 #endif
3007 /* ----------------------------------------------- */
3008 #if defined(PETSC_HAVE_ELEMENTAL)
3009 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3010 {
3011   PetscFunctionBegin;
3012   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3013   C->ops->productsymbolic = MatProductSymbolic_AB;
3014   PetscFunctionReturn(0);
3015 }
3016 #endif
3017 
3018 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3019 {
3020   Mat_Product *product = C->product;
3021   Mat         A = product->A,B=product->B;
3022 
3023   PetscFunctionBegin;
3024   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3025     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
3026   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3027   C->ops->productsymbolic = MatProductSymbolic_AtB;
3028   PetscFunctionReturn(0);
3029 }
3030 
3031 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3032 {
3033   PetscErrorCode ierr;
3034   Mat_Product    *product = C->product;
3035   const char     *algTypes[2] = {"allgatherv","cyclic"};
3036   PetscInt       alg,nalg = 2;
3037   PetscBool      flg = PETSC_FALSE;
3038 
3039   PetscFunctionBegin;
3040   /* Set default algorithm */
3041   alg = 0; /* default is allgatherv */
3042   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
3043   if (flg) {
3044     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3045   }
3046 
3047   /* Get runtime option */
3048   if (product->api_user) {
3049     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
3050     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3051     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3052   } else {
3053     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
3054     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3055     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3056   }
3057   if (flg) {
3058     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3059   }
3060 
3061   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3062   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3063   PetscFunctionReturn(0);
3064 }
3065 
3066 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3067 {
3068   PetscErrorCode ierr;
3069   Mat_Product    *product = C->product;
3070 
3071   PetscFunctionBegin;
3072   switch (product->type) {
3073 #if defined(PETSC_HAVE_ELEMENTAL)
3074   case MATPRODUCT_AB:
3075     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3076     break;
3077 #endif
3078   case MATPRODUCT_AtB:
3079     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3080     break;
3081   case MATPRODUCT_ABt:
3082     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3083     break;
3084   default:
3085     break;
3086   }
3087   PetscFunctionReturn(0);
3088 }
3089