xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision f0fc11cebb1bb284829732915f9e84cabc170c2f)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
32   if (flg) *B = mat->A;
33   else *B = A;
34   PetscFunctionReturn(0);
35 }
36 
37 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
38 {
39   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
40   PetscErrorCode ierr;
41   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
42 
43   PetscFunctionBegin;
44   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
45   lrow = row - rstart;
46   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
51 {
52   PetscErrorCode ierr;
53 
54   PetscFunctionBegin;
55   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
56   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
61 {
62   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
63   PetscErrorCode ierr;
64   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
65   PetscScalar    *array;
66   MPI_Comm       comm;
67   Mat            B;
68 
69   PetscFunctionBegin;
70   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
71 
72   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
73   if (!B) {
74     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
75     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
76     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
77     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
78     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
79     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
80     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
81     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
82     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
84     *a   = B;
85     ierr = MatDestroy(&B);CHKERRQ(ierr);
86   } else *a = B;
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
91 {
92   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
93   PetscErrorCode ierr;
94   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
95   PetscBool      roworiented = A->roworiented;
96 
97   PetscFunctionBegin;
98   for (i=0; i<m; i++) {
99     if (idxm[i] < 0) continue;
100     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
101     if (idxm[i] >= rstart && idxm[i] < rend) {
102       row = idxm[i] - rstart;
103       if (roworiented) {
104         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
105       } else {
106         for (j=0; j<n; j++) {
107           if (idxn[j] < 0) continue;
108           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
109           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
110         }
111       }
112     } else if (!A->donotstash) {
113       mat->assembled = PETSC_FALSE;
114       if (roworiented) {
115         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
116       } else {
117         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
118       }
119     }
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
125 {
126   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
127   PetscErrorCode ierr;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129 
130   PetscFunctionBegin;
131   for (i=0; i<m; i++) {
132     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
133     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
134     if (idxm[i] >= rstart && idxm[i] < rend) {
135       row = idxm[i] - rstart;
136       for (j=0; j<n; j++) {
137         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
138         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
139         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
140       }
141     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
142   }
143   PetscFunctionReturn(0);
144 }
145 
146 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
147 {
148   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
149   PetscErrorCode ierr;
150 
151   PetscFunctionBegin;
152   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
157 {
158   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
159   PetscErrorCode ierr;
160 
161   PetscFunctionBegin;
162   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
167 {
168   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
169   PetscErrorCode ierr;
170 
171   PetscFunctionBegin;
172   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
173   PetscFunctionReturn(0);
174 }
175 
176 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
177 {
178   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
179   PetscErrorCode ierr;
180 
181   PetscFunctionBegin;
182   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
187 {
188   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
189   PetscErrorCode ierr;
190 
191   PetscFunctionBegin;
192   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
193   PetscFunctionReturn(0);
194 }
195 
196 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
197 {
198   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
199   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
200   PetscErrorCode ierr;
201   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
202   const PetscInt *irow,*icol;
203   PetscScalar    *av,*bv,*v = lmat->v;
204   Mat            newmat;
205   IS             iscol_local;
206   MPI_Comm       comm_is,comm_mat;
207 
208   PetscFunctionBegin;
209   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
210   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
211   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
212 
213   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
214   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
215   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
216   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
217   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
218   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
219 
220   /* No parallel redistribution currently supported! Should really check each index set
221      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
222      original matrix! */
223 
224   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
225   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
226 
227   /* Check submatrix call */
228   if (scall == MAT_REUSE_MATRIX) {
229     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
230     /* Really need to test rows and column sizes! */
231     newmat = *B;
232   } else {
233     /* Create and fill new matrix */
234     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
235     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
236     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
237     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
238   }
239 
240   /* Now extract the data pointers and do the copy, column at a time */
241   newmatd = (Mat_MPIDense*)newmat->data;
242   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
243 
244   for (i=0; i<Ncols; i++) {
245     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
246     for (j=0; j<nrows; j++) {
247       *bv++ = av[irow[j] - rstart];
248     }
249   }
250 
251   /* Assemble the matrices so that the correct flags are set */
252   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
253   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
254 
255   /* Free work space */
256   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
257   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
258   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
259   *B   = newmat;
260   PetscFunctionReturn(0);
261 }
262 
263 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
264 {
265   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
266   PetscErrorCode ierr;
267 
268   PetscFunctionBegin;
269   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 
273 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
274 {
275   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
276   PetscErrorCode ierr;
277 
278   PetscFunctionBegin;
279   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
280   PetscFunctionReturn(0);
281 }
282 
283 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
284 {
285   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
286   MPI_Comm       comm;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
293   /* make sure all processors are either in INSERTMODE or ADDMODE */
294   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
295   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
296   mat->insertmode = addv; /* in case this processor had no cache */
297 
298   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
299   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
300   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303 
304 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
305 {
306   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
307   PetscErrorCode ierr;
308   PetscInt       i,*row,*col,flg,j,rstart,ncols;
309   PetscMPIInt    n;
310   PetscScalar    *val;
311 
312   PetscFunctionBegin;
313   /*  wait on receives */
314   while (1) {
315     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
316     if (!flg) break;
317 
318     for (i=0; i<n;) {
319       /* Now identify the consecutive vals belonging to the same row */
320       for (j=i,rstart=row[j]; j<n; j++) {
321         if (row[j] != rstart) break;
322       }
323       if (j < n) ncols = j-i;
324       else       ncols = n-i;
325       /* Now assemble all these values with a single function call */
326       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
327       i    = j;
328     }
329   }
330   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
331 
332   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
333   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
334 
335   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
336     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
337   }
338   PetscFunctionReturn(0);
339 }
340 
341 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
342 {
343   PetscErrorCode ierr;
344   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
345 
346   PetscFunctionBegin;
347   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /* the code does not do the diagonal entries correctly unless the
352    matrix is square and the column and row owerships are identical.
353    This is a BUG. The only way to fix it seems to be to access
354    mdn->A and mdn->B directly and not through the MatZeroRows()
355    routine.
356 */
357 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
358 {
359   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
360   PetscErrorCode    ierr;
361   PetscInt          i,*owners = A->rmap->range;
362   PetscInt          *sizes,j,idx,nsends;
363   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
364   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
365   PetscInt          *lens,*lrows,*values;
366   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
367   MPI_Comm          comm;
368   MPI_Request       *send_waits,*recv_waits;
369   MPI_Status        recv_status,*send_status;
370   PetscBool         found;
371   const PetscScalar *xx;
372   PetscScalar       *bb;
373 
374   PetscFunctionBegin;
375   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
376   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
377   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
378   /*  first count number of contributors to each processor */
379   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
380   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
381   for (i=0; i<N; i++) {
382     idx   = rows[i];
383     found = PETSC_FALSE;
384     for (j=0; j<size; j++) {
385       if (idx >= owners[j] && idx < owners[j+1]) {
386         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
387       }
388     }
389     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
390   }
391   nsends = 0;
392   for (i=0; i<size; i++) nsends += sizes[2*i+1];
393 
394   /* inform other processors of number of messages and max length*/
395   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
396 
397   /* post receives:   */
398   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
399   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
400   for (i=0; i<nrecvs; i++) {
401     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
402   }
403 
404   /* do sends:
405       1) starts[i] gives the starting index in svalues for stuff going to
406          the ith processor
407   */
408   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
409   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
410   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
411 
412   starts[0] = 0;
413   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
414   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
415 
416   starts[0] = 0;
417   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
418   count = 0;
419   for (i=0; i<size; i++) {
420     if (sizes[2*i+1]) {
421       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
422     }
423   }
424   ierr = PetscFree(starts);CHKERRQ(ierr);
425 
426   base = owners[rank];
427 
428   /*  wait on receives */
429   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
430   count = nrecvs;
431   slen  = 0;
432   while (count) {
433     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
434     /* unpack receives into our local space */
435     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
436 
437     source[imdex] = recv_status.MPI_SOURCE;
438     lens[imdex]   = n;
439     slen += n;
440     count--;
441   }
442   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
443 
444   /* move the data into the send scatter */
445   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
446   count = 0;
447   for (i=0; i<nrecvs; i++) {
448     values = rvalues + i*nmax;
449     for (j=0; j<lens[i]; j++) {
450       lrows[count++] = values[j] - base;
451     }
452   }
453   ierr = PetscFree(rvalues);CHKERRQ(ierr);
454   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
455   ierr = PetscFree(owner);CHKERRQ(ierr);
456   ierr = PetscFree(sizes);CHKERRQ(ierr);
457 
458   /* fix right hand side if needed */
459   if (x && b) {
460     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
461     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
462     for (i=0; i<slen; i++) {
463       bb[lrows[i]] = diag*xx[lrows[i]];
464     }
465     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
466     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
467   }
468 
469   /* actually zap the local rows */
470   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
471   if (diag != 0.0) {
472     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
473     PetscInt     m   = ll->lda, i;
474 
475     for (i=0; i<slen; i++) {
476       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
477     }
478   }
479   ierr = PetscFree(lrows);CHKERRQ(ierr);
480 
481   /* wait on sends */
482   if (nsends) {
483     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
484     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
485     ierr = PetscFree(send_status);CHKERRQ(ierr);
486   }
487   ierr = PetscFree(send_waits);CHKERRQ(ierr);
488   ierr = PetscFree(svalues);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
493 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
494 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
495 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
496 
497 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
498 {
499   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
500   PetscErrorCode ierr;
501 
502   PetscFunctionBegin;
503   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
504   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
510 {
511   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
512   PetscErrorCode ierr;
513 
514   PetscFunctionBegin;
515   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
516   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
517   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
518   PetscFunctionReturn(0);
519 }
520 
521 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
522 {
523   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
524   PetscErrorCode ierr;
525   PetscScalar    zero = 0.0;
526 
527   PetscFunctionBegin;
528   ierr = VecSet(yy,zero);CHKERRQ(ierr);
529   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
530   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
531   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
532   PetscFunctionReturn(0);
533 }
534 
535 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
536 {
537   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
538   PetscErrorCode ierr;
539 
540   PetscFunctionBegin;
541   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
542   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
543   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
544   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
545   PetscFunctionReturn(0);
546 }
547 
548 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
549 {
550   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
551   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
552   PetscErrorCode ierr;
553   PetscInt       len,i,n,m = A->rmap->n,radd;
554   PetscScalar    *x,zero = 0.0;
555 
556   PetscFunctionBegin;
557   ierr = VecSet(v,zero);CHKERRQ(ierr);
558   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
559   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
560   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
561   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
562   radd = A->rmap->rstart*m;
563   for (i=0; i<len; i++) {
564     x[i] = aloc->v[radd + i*m + i];
565   }
566   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 PetscErrorCode MatDestroy_MPIDense(Mat mat)
571 {
572   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
573   PetscErrorCode ierr;
574 
575   PetscFunctionBegin;
576 #if defined(PETSC_USE_LOG)
577   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
578 #endif
579   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
580   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
581   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
582   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
583 
584   ierr = PetscFree(mat->data);CHKERRQ(ierr);
585   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
586 
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
594 #if defined(PETSC_HAVE_ELEMENTAL)
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
596 #endif
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
608   PetscFunctionReturn(0);
609 }
610 
611 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
612 
613 #include <petscdraw.h>
614 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
615 {
616   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
617   PetscErrorCode    ierr;
618   PetscMPIInt       rank = mdn->rank;
619   PetscViewerType   vtype;
620   PetscBool         iascii,isdraw;
621   PetscViewer       sviewer;
622   PetscViewerFormat format;
623 
624   PetscFunctionBegin;
625   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
626   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
627   if (iascii) {
628     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
629     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
630     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
631       MatInfo info;
632       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
633       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
634       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
635       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
636       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
637       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_INFO) {
640       PetscFunctionReturn(0);
641     }
642   } else if (isdraw) {
643     PetscDraw draw;
644     PetscBool isnull;
645 
646     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
647     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
648     if (isnull) PetscFunctionReturn(0);
649   }
650 
651   {
652     /* assemble the entire matrix onto first processor. */
653     Mat         A;
654     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
655     PetscInt    *cols;
656     PetscScalar *vals;
657 
658     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
659     if (!rank) {
660       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
661     } else {
662       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
663     }
664     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
665     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
666     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
667     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
668 
669     /* Copy the matrix ... This isn't the most efficient means,
670        but it's quick for now */
671     A->insertmode = INSERT_VALUES;
672 
673     row = mat->rmap->rstart;
674     m   = mdn->A->rmap->n;
675     for (i=0; i<m; i++) {
676       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
677       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
678       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
679       row++;
680     }
681 
682     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
683     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
684     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
685     if (!rank) {
686       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
687       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
688     }
689     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
690     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
691     ierr = MatDestroy(&A);CHKERRQ(ierr);
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
697 {
698   PetscErrorCode ierr;
699   PetscBool      iascii,isbinary,isdraw,issocket;
700 
701   PetscFunctionBegin;
702   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
703   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
704   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
705   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
706 
707   if (iascii || issocket || isdraw) {
708     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
709   } else if (isbinary) {
710     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
711   }
712   PetscFunctionReturn(0);
713 }
714 
715 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
716 {
717   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
718   Mat            mdn  = mat->A;
719   PetscErrorCode ierr;
720   PetscLogDouble isend[5],irecv[5];
721 
722   PetscFunctionBegin;
723   info->block_size = 1.0;
724 
725   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
726 
727   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
728   isend[3] = info->memory;  isend[4] = info->mallocs;
729   if (flag == MAT_LOCAL) {
730     info->nz_used      = isend[0];
731     info->nz_allocated = isend[1];
732     info->nz_unneeded  = isend[2];
733     info->memory       = isend[3];
734     info->mallocs      = isend[4];
735   } else if (flag == MAT_GLOBAL_MAX) {
736     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
737 
738     info->nz_used      = irecv[0];
739     info->nz_allocated = irecv[1];
740     info->nz_unneeded  = irecv[2];
741     info->memory       = irecv[3];
742     info->mallocs      = irecv[4];
743   } else if (flag == MAT_GLOBAL_SUM) {
744     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
745 
746     info->nz_used      = irecv[0];
747     info->nz_allocated = irecv[1];
748     info->nz_unneeded  = irecv[2];
749     info->memory       = irecv[3];
750     info->mallocs      = irecv[4];
751   }
752   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
753   info->fill_ratio_needed = 0;
754   info->factor_mallocs    = 0;
755   PetscFunctionReturn(0);
756 }
757 
758 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
759 {
760   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
761   PetscErrorCode ierr;
762 
763   PetscFunctionBegin;
764   switch (op) {
765   case MAT_NEW_NONZERO_LOCATIONS:
766   case MAT_NEW_NONZERO_LOCATION_ERR:
767   case MAT_NEW_NONZERO_ALLOCATION_ERR:
768     MatCheckPreallocated(A,1);
769     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
770     break;
771   case MAT_ROW_ORIENTED:
772     MatCheckPreallocated(A,1);
773     a->roworiented = flg;
774     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
775     break;
776   case MAT_NEW_DIAGONALS:
777   case MAT_KEEP_NONZERO_PATTERN:
778   case MAT_USE_HASH_TABLE:
779   case MAT_SORTED_FULL:
780     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
781     break;
782   case MAT_IGNORE_OFF_PROC_ENTRIES:
783     a->donotstash = flg;
784     break;
785   case MAT_SYMMETRIC:
786   case MAT_STRUCTURALLY_SYMMETRIC:
787   case MAT_HERMITIAN:
788   case MAT_SYMMETRY_ETERNAL:
789   case MAT_IGNORE_LOWER_TRIANGULAR:
790   case MAT_IGNORE_ZERO_ENTRIES:
791     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
792     break;
793   default:
794     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 
800 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
801 {
802   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
803   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
804   const PetscScalar *l,*r;
805   PetscScalar       x,*v;
806   PetscErrorCode    ierr;
807   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
808 
809   PetscFunctionBegin;
810   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
811   if (ll) {
812     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
813     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
814     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
815     for (i=0; i<m; i++) {
816       x = l[i];
817       v = mat->v + i;
818       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
819     }
820     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
821     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
822   }
823   if (rr) {
824     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
825     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
826     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
829     for (i=0; i<n; i++) {
830       x = r[i];
831       v = mat->v + i*m;
832       for (j=0; j<m; j++) (*v++) *= x;
833     }
834     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
835     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
841 {
842   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
843   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
844   PetscErrorCode ierr;
845   PetscInt       i,j;
846   PetscReal      sum = 0.0;
847   PetscScalar    *v  = mat->v;
848 
849   PetscFunctionBegin;
850   if (mdn->size == 1) {
851     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
852   } else {
853     if (type == NORM_FROBENIUS) {
854       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
855         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
856       }
857       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
858       *nrm = PetscSqrtReal(*nrm);
859       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
860     } else if (type == NORM_1) {
861       PetscReal *tmp,*tmp2;
862       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
863       *nrm = 0.0;
864       v    = mat->v;
865       for (j=0; j<mdn->A->cmap->n; j++) {
866         for (i=0; i<mdn->A->rmap->n; i++) {
867           tmp[j] += PetscAbsScalar(*v);  v++;
868         }
869       }
870       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
871       for (j=0; j<A->cmap->N; j++) {
872         if (tmp2[j] > *nrm) *nrm = tmp2[j];
873       }
874       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
875       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
876     } else if (type == NORM_INFINITY) { /* max row norm */
877       PetscReal ntemp;
878       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
879       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
880     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
881   }
882   PetscFunctionReturn(0);
883 }
884 
885 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
886 {
887   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
888   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
889   Mat            B;
890   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
891   PetscErrorCode ierr;
892   PetscInt       j,i;
893   PetscScalar    *v;
894 
895   PetscFunctionBegin;
896   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
897     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
898     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
899     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
900     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
901   } else {
902     B = *matout;
903   }
904 
905   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
906   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
907   for (i=0; i<m; i++) rwork[i] = rstart + i;
908   for (j=0; j<n; j++) {
909     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
910     v   += m;
911   }
912   ierr = PetscFree(rwork);CHKERRQ(ierr);
913   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
914   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
915   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
916     *matout = B;
917   } else {
918     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
919   }
920   PetscFunctionReturn(0);
921 }
922 
923 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
924 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
925 
926 PetscErrorCode MatSetUp_MPIDense(Mat A)
927 {
928   PetscErrorCode ierr;
929 
930   PetscFunctionBegin;
931   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
932   PetscFunctionReturn(0);
933 }
934 
935 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
936 {
937   PetscErrorCode ierr;
938   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
939 
940   PetscFunctionBegin;
941   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
942   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
947 {
948   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
949   PetscErrorCode ierr;
950 
951   PetscFunctionBegin;
952   ierr = MatConjugate(a->A);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatRealPart_MPIDense(Mat A)
957 {
958   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
959   PetscErrorCode ierr;
960 
961   PetscFunctionBegin;
962   ierr = MatRealPart(a->A);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
967 {
968   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
977 {
978   PetscErrorCode    ierr;
979   PetscScalar       *x;
980   const PetscScalar *a;
981   PetscInt          lda;
982 
983   PetscFunctionBegin;
984   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
985   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
986   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
987   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
988   ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr);
989   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
990   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
995 
996 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
997 {
998   PetscErrorCode ierr;
999   PetscInt       i,n;
1000   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1001   PetscReal      *work;
1002 
1003   PetscFunctionBegin;
1004   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1005   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1006   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1007   if (type == NORM_2) {
1008     for (i=0; i<n; i++) work[i] *= work[i];
1009   }
1010   if (type == NORM_INFINITY) {
1011     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1012   } else {
1013     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1014   }
1015   ierr = PetscFree(work);CHKERRQ(ierr);
1016   if (type == NORM_2) {
1017     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1018   }
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1023 {
1024   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1025   PetscErrorCode ierr;
1026   PetscScalar    *a;
1027   PetscInt       m,n,i;
1028 
1029   PetscFunctionBegin;
1030   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1031   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1032   for (i=0; i<m*n; i++) {
1033     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1034   }
1035   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1040 
1041 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1042 {
1043   PetscFunctionBegin;
1044   *missing = PETSC_FALSE;
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1049 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1050 
1051 /* -------------------------------------------------------------------*/
1052 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1053                                         MatGetRow_MPIDense,
1054                                         MatRestoreRow_MPIDense,
1055                                         MatMult_MPIDense,
1056                                 /*  4*/ MatMultAdd_MPIDense,
1057                                         MatMultTranspose_MPIDense,
1058                                         MatMultTransposeAdd_MPIDense,
1059                                         0,
1060                                         0,
1061                                         0,
1062                                 /* 10*/ 0,
1063                                         0,
1064                                         0,
1065                                         0,
1066                                         MatTranspose_MPIDense,
1067                                 /* 15*/ MatGetInfo_MPIDense,
1068                                         MatEqual_MPIDense,
1069                                         MatGetDiagonal_MPIDense,
1070                                         MatDiagonalScale_MPIDense,
1071                                         MatNorm_MPIDense,
1072                                 /* 20*/ MatAssemblyBegin_MPIDense,
1073                                         MatAssemblyEnd_MPIDense,
1074                                         MatSetOption_MPIDense,
1075                                         MatZeroEntries_MPIDense,
1076                                 /* 24*/ MatZeroRows_MPIDense,
1077                                         0,
1078                                         0,
1079                                         0,
1080                                         0,
1081                                 /* 29*/ MatSetUp_MPIDense,
1082                                         0,
1083                                         0,
1084                                         MatGetDiagonalBlock_MPIDense,
1085                                         0,
1086                                 /* 34*/ MatDuplicate_MPIDense,
1087                                         0,
1088                                         0,
1089                                         0,
1090                                         0,
1091                                 /* 39*/ MatAXPY_MPIDense,
1092                                         MatCreateSubMatrices_MPIDense,
1093                                         0,
1094                                         MatGetValues_MPIDense,
1095                                         0,
1096                                 /* 44*/ 0,
1097                                         MatScale_MPIDense,
1098                                         MatShift_Basic,
1099                                         0,
1100                                         0,
1101                                 /* 49*/ MatSetRandom_MPIDense,
1102                                         0,
1103                                         0,
1104                                         0,
1105                                         0,
1106                                 /* 54*/ 0,
1107                                         0,
1108                                         0,
1109                                         0,
1110                                         0,
1111                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1112                                         MatDestroy_MPIDense,
1113                                         MatView_MPIDense,
1114                                         0,
1115                                         0,
1116                                 /* 64*/ 0,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                         0,
1121                                 /* 69*/ 0,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                         0,
1126                                 /* 74*/ 0,
1127                                         0,
1128                                         0,
1129                                         0,
1130                                         0,
1131                                 /* 79*/ 0,
1132                                         0,
1133                                         0,
1134                                         0,
1135                                 /* 83*/ MatLoad_MPIDense,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                         0,
1141 #if defined(PETSC_HAVE_ELEMENTAL)
1142                                 /* 89*/ 0,
1143                                         0,
1144 #else
1145                                 /* 89*/ 0,
1146                                         0,
1147 #endif
1148                                         MatMatMultNumeric_MPIDense,
1149                                         0,
1150                                         0,
1151                                 /* 94*/ 0,
1152                                         0,
1153                                         0,
1154                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1155                                         0,
1156                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1157                                         0,
1158                                         0,
1159                                         MatConjugate_MPIDense,
1160                                         0,
1161                                 /*104*/ 0,
1162                                         MatRealPart_MPIDense,
1163                                         MatImaginaryPart_MPIDense,
1164                                         0,
1165                                         0,
1166                                 /*109*/ 0,
1167                                         0,
1168                                         0,
1169                                         MatGetColumnVector_MPIDense,
1170                                         MatMissingDiagonal_MPIDense,
1171                                 /*114*/ 0,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                         0,
1176                                 /*119*/ 0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                         0,
1181                                 /*124*/ 0,
1182                                         MatGetColumnNorms_MPIDense,
1183                                         0,
1184                                         0,
1185                                         0,
1186                                 /*129*/ 0,
1187                                         0,
1188                                         0,
1189                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1190                                         0,
1191                                 /*134*/ 0,
1192                                         0,
1193                                         0,
1194                                         0,
1195                                         0,
1196                                 /*139*/ 0,
1197                                         0,
1198                                         0,
1199                                         0,
1200                                         0,
1201                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1202                                 /*145*/ 0,
1203                                         0,
1204                                         0
1205 };
1206 
1207 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1208 {
1209   Mat_MPIDense   *a;
1210   PetscErrorCode ierr;
1211 
1212   PetscFunctionBegin;
1213   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1214   mat->preallocated = PETSC_TRUE;
1215   /* Note:  For now, when data is specified above, this assumes the user correctly
1216    allocates the local dense storage space.  We should add error checking. */
1217 
1218   a       = (Mat_MPIDense*)mat->data;
1219   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1220   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1221   a->nvec = mat->cmap->n;
1222 
1223   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1224   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1225   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1226   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1227   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #if defined(PETSC_HAVE_ELEMENTAL)
1232 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1233 {
1234   Mat            mat_elemental;
1235   PetscErrorCode ierr;
1236   PetscScalar    *v;
1237   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1238 
1239   PetscFunctionBegin;
1240   if (reuse == MAT_REUSE_MATRIX) {
1241     mat_elemental = *newmat;
1242     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1243   } else {
1244     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1245     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1246     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1247     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1248     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1249   }
1250 
1251   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1252   for (i=0; i<N; i++) cols[i] = i;
1253   for (i=0; i<m; i++) rows[i] = rstart + i;
1254 
1255   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1256   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1257   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1258   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1259   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1260   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1261   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1262 
1263   if (reuse == MAT_INPLACE_MATRIX) {
1264     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1265   } else {
1266     *newmat = mat_elemental;
1267   }
1268   PetscFunctionReturn(0);
1269 }
1270 #endif
1271 
1272 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1273 {
1274   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1275   PetscErrorCode ierr;
1276 
1277   PetscFunctionBegin;
1278   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1283 {
1284   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1293 {
1294   PetscErrorCode ierr;
1295   Mat_MPIDense   *mat;
1296   PetscInt       m,nloc,N;
1297 
1298   PetscFunctionBegin;
1299   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1300   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1301   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1302     PetscInt sum;
1303 
1304     if (n == PETSC_DECIDE) {
1305       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1306     }
1307     /* Check sum(n) = N */
1308     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1309     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1310 
1311     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1312   }
1313 
1314   /* numeric phase */
1315   mat = (Mat_MPIDense*)(*outmat)->data;
1316   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1317   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1318   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1323 {
1324   Mat_MPIDense   *a;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1329   mat->data = (void*)a;
1330   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1331 
1332   mat->insertmode = NOT_SET_VALUES;
1333   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1334   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1335 
1336   /* build cache for off array entries formed */
1337   a->donotstash = PETSC_FALSE;
1338 
1339   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1340 
1341   /* stuff used for matrix vector multiply */
1342   a->lvec        = 0;
1343   a->Mvctx       = 0;
1344   a->roworiented = PETSC_TRUE;
1345 
1346   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1347   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1348   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1349   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1353 #if defined(PETSC_HAVE_ELEMENTAL)
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1355 #endif
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1358   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1360   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr);
1363 
1364   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1365   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1366   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1367   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1368   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 /*MC
1373    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1374 
1375    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1376    and MATMPIDENSE otherwise.
1377 
1378    Options Database Keys:
1379 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1380 
1381   Level: beginner
1382 
1383 
1384 .seealso: MATSEQDENSE,MATMPIDENSE
1385 M*/
1386 
1387 /*@C
1388    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1389 
1390    Not collective
1391 
1392    Input Parameters:
1393 .  B - the matrix
1394 -  data - optional location of matrix data.  Set data=NULL for PETSc
1395    to control all matrix memory allocation.
1396 
1397    Notes:
1398    The dense format is fully compatible with standard Fortran 77
1399    storage by columns.
1400 
1401    The data input variable is intended primarily for Fortran programmers
1402    who wish to allocate their own matrix memory space.  Most users should
1403    set data=NULL.
1404 
1405    Level: intermediate
1406 
1407 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1408 @*/
1409 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1410 {
1411   PetscErrorCode ierr;
1412 
1413   PetscFunctionBegin;
1414   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /*@
1419    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1420    array provided by the user. This is useful to avoid copying an array
1421    into a matrix
1422 
1423    Not Collective
1424 
1425    Input Parameters:
1426 +  mat - the matrix
1427 -  array - the array in column major order
1428 
1429    Notes:
1430    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1431    freed when the matrix is destroyed.
1432 
1433    Level: developer
1434 
1435 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1436 
1437 @*/
1438 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1439 {
1440   PetscErrorCode ierr;
1441   PetscFunctionBegin;
1442   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1443   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 /*@
1448    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1449 
1450    Not Collective
1451 
1452    Input Parameters:
1453 .  mat - the matrix
1454 
1455    Notes:
1456    You can only call this after a call to MatDensePlaceArray()
1457 
1458    Level: developer
1459 
1460 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1461 
1462 @*/
1463 PetscErrorCode  MatDenseResetArray(Mat mat)
1464 {
1465   PetscErrorCode ierr;
1466   PetscFunctionBegin;
1467   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1468   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 /*@C
1473    MatCreateDense - Creates a parallel matrix in dense format.
1474 
1475    Collective
1476 
1477    Input Parameters:
1478 +  comm - MPI communicator
1479 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1480 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1481 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1482 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1483 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1484    to control all matrix memory allocation.
1485 
1486    Output Parameter:
1487 .  A - the matrix
1488 
1489    Notes:
1490    The dense format is fully compatible with standard Fortran 77
1491    storage by columns.
1492 
1493    The data input variable is intended primarily for Fortran programmers
1494    who wish to allocate their own matrix memory space.  Most users should
1495    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1496 
1497    The user MUST specify either the local or global matrix dimensions
1498    (possibly both).
1499 
1500    Level: intermediate
1501 
1502 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1503 @*/
1504 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1505 {
1506   PetscErrorCode ierr;
1507   PetscMPIInt    size;
1508 
1509   PetscFunctionBegin;
1510   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1511   PetscValidLogicalCollectiveBool(*A,!!data,6);
1512   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1513   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1514   if (size > 1) {
1515     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1516     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1517     if (data) {  /* user provided data array, so no need to assemble */
1518       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1519       (*A)->assembled = PETSC_TRUE;
1520     }
1521   } else {
1522     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1523     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1524   }
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1529 {
1530   Mat            mat;
1531   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1532   PetscErrorCode ierr;
1533 
1534   PetscFunctionBegin;
1535   *newmat = 0;
1536   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1537   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1538   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1539   a       = (Mat_MPIDense*)mat->data;
1540 
1541   mat->factortype   = A->factortype;
1542   mat->assembled    = PETSC_TRUE;
1543   mat->preallocated = PETSC_TRUE;
1544 
1545   a->size         = oldmat->size;
1546   a->rank         = oldmat->rank;
1547   mat->insertmode = NOT_SET_VALUES;
1548   a->nvec         = oldmat->nvec;
1549   a->donotstash   = oldmat->donotstash;
1550 
1551   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1552   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1553 
1554   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1555   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1556   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1557 
1558   *newmat = mat;
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1563 {
1564   PetscErrorCode ierr;
1565   PetscBool      isbinary;
1566 #if defined(PETSC_HAVE_HDF5)
1567   PetscBool      ishdf5;
1568 #endif
1569 
1570   PetscFunctionBegin;
1571   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
1572   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
1573   /* force binary viewer to load .info file if it has not yet done so */
1574   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1575   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1576 #if defined(PETSC_HAVE_HDF5)
1577   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
1578 #endif
1579   if (isbinary) {
1580     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
1581 #if defined(PETSC_HAVE_HDF5)
1582   } else if (ishdf5) {
1583     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
1584 #endif
1585   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1590 {
1591   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1592   Mat            a,b;
1593   PetscBool      flg;
1594   PetscErrorCode ierr;
1595 
1596   PetscFunctionBegin;
1597   a    = matA->A;
1598   b    = matB->A;
1599   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1600   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1605 {
1606   PetscErrorCode        ierr;
1607   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1608   Mat_TransMatMultDense *atb = a->atbdense;
1609 
1610   PetscFunctionBegin;
1611   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1612   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1613   ierr = PetscFree(atb);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1618 {
1619   PetscErrorCode        ierr;
1620   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1621   Mat_MatTransMultDense *abt = a->abtdense;
1622 
1623   PetscFunctionBegin;
1624   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1625   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1626   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1627   ierr = PetscFree(abt);CHKERRQ(ierr);
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1632 {
1633   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1634   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1635   Mat_TransMatMultDense *atb = c->atbdense;
1636   PetscErrorCode ierr;
1637   MPI_Comm       comm;
1638   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1639   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1640   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1641   PetscScalar    _DOne=1.0,_DZero=0.0;
1642   PetscBLASInt   am,an,bn,aN;
1643   const PetscInt *ranges;
1644 
1645   PetscFunctionBegin;
1646   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1647   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1648   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1649 
1650   /* compute atbarray = aseq^T * bseq */
1651   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1652   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1653   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1654   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1655   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1656 
1657   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1658   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1659 
1660   /* arrange atbarray into sendbuf */
1661   k = 0;
1662   for (proc=0; proc<size; proc++) {
1663     for (j=0; j<cN; j++) {
1664       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1665     }
1666   }
1667   /* sum all atbarray to local values of C */
1668   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1669   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1670   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
1675 {
1676   PetscErrorCode        ierr;
1677   MPI_Comm              comm;
1678   PetscMPIInt           size;
1679   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1680   Mat_MPIDense          *c;
1681   Mat_TransMatMultDense *atb;
1682 
1683   PetscFunctionBegin;
1684   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1685   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1686     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1687   }
1688 
1689   /* create matrix product C */
1690   ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1691   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
1692   ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr);
1693   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695 
1696   /* create data structure for reuse C */
1697   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1698   ierr = PetscNew(&atb);CHKERRQ(ierr);
1699   cM = C->rmap->N;
1700   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1701 
1702   c                    = (Mat_MPIDense*)C->data;
1703   c->atbdense          = atb;
1704   atb->destroy         = C->ops->destroy;
1705   C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1710 {
1711   PetscErrorCode        ierr;
1712   MPI_Comm              comm;
1713   PetscMPIInt           i, size;
1714   PetscInt              maxRows, bufsiz;
1715   Mat_MPIDense          *c;
1716   PetscMPIInt           tag;
1717   PetscInt              alg;
1718   Mat_MatTransMultDense *abt;
1719   Mat_Product           *product = C->product;
1720   PetscBool             flg;
1721 
1722   PetscFunctionBegin;
1723   /* check local size of A and B */
1724   if (A->cmap->n != B->cmap->n) {
1725     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
1726   }
1727 
1728   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
1729   if (flg) {
1730     alg = 0;
1731   } else alg = 1;
1732 
1733   /* setup matrix product C */
1734   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
1735   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
1736   ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr);
1737   ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr);
1738   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1739   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1740 
1741   /* create data structure for reuse C */
1742   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
1743   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1744   ierr = PetscNew(&abt);CHKERRQ(ierr);
1745   abt->tag = tag;
1746   abt->alg = alg;
1747   switch (alg) {
1748   case 1: /* alg: "cyclic" */
1749     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
1750     bufsiz = A->cmap->N * maxRows;
1751     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
1752     break;
1753   default: /* alg: "allgatherv" */
1754     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
1755     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
1756     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
1757     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
1758     break;
1759   }
1760 
1761   c                               = (Mat_MPIDense*)C->data;
1762   c->abtdense                     = abt;
1763   abt->destroy                    = C->ops->destroy;
1764   C->ops->destroy                 = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
1765   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
1770 {
1771   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1772   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1773   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
1774   Mat_MatTransMultDense *abt = c->abtdense;
1775   PetscErrorCode ierr;
1776   MPI_Comm       comm;
1777   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
1778   PetscScalar    *sendbuf, *recvbuf=0, *carray;
1779   PetscInt       i,cK=A->cmap->N,k,j,bn;
1780   PetscScalar    _DOne=1.0,_DZero=0.0;
1781   PetscBLASInt   cm, cn, ck;
1782   MPI_Request    reqs[2];
1783   const PetscInt *ranges;
1784 
1785   PetscFunctionBegin;
1786   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1787   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1788   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1789 
1790   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
1791   bn = B->rmap->n;
1792   if (bseq->lda == bn) {
1793     sendbuf = bseq->v;
1794   } else {
1795     sendbuf = abt->buf[0];
1796     for (k = 0, i = 0; i < cK; i++) {
1797       for (j = 0; j < bn; j++, k++) {
1798         sendbuf[k] = bseq->v[i * bseq->lda + j];
1799       }
1800     }
1801   }
1802   if (size > 1) {
1803     sendto = (rank + size - 1) % size;
1804     recvfrom = (rank + size + 1) % size;
1805   } else {
1806     sendto = recvfrom = 0;
1807   }
1808   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
1809   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
1810   recvisfrom = rank;
1811   for (i = 0; i < size; i++) {
1812     /* we have finished receiving in sending, bufs can be read/modified */
1813     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
1814     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
1815 
1816     if (nextrecvisfrom != rank) {
1817       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
1818       sendsiz = cK * bn;
1819       recvsiz = cK * nextbn;
1820       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
1821       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
1822       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
1823     }
1824 
1825     /* local aseq * sendbuf^T */
1826     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
1827     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
1828     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
1829 
1830     if (nextrecvisfrom != rank) {
1831       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
1832       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
1833     }
1834     bn = nextbn;
1835     recvisfrom = nextrecvisfrom;
1836     sendbuf = recvbuf;
1837   }
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
1842 {
1843   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1844   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1845   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
1846   Mat_MatTransMultDense *abt = c->abtdense;
1847   PetscErrorCode ierr;
1848   MPI_Comm       comm;
1849   PetscMPIInt    rank,size;
1850   PetscScalar    *sendbuf, *recvbuf;
1851   PetscInt       i,cK=A->cmap->N,k,j,bn;
1852   PetscScalar    _DOne=1.0,_DZero=0.0;
1853   PetscBLASInt   cm, cn, ck;
1854 
1855   PetscFunctionBegin;
1856   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1857   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1858   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1859 
1860   /* copy transpose of B into buf[0] */
1861   bn      = B->rmap->n;
1862   sendbuf = abt->buf[0];
1863   recvbuf = abt->buf[1];
1864   for (k = 0, j = 0; j < bn; j++) {
1865     for (i = 0; i < cK; i++, k++) {
1866       sendbuf[k] = bseq->v[i * bseq->lda + j];
1867     }
1868   }
1869   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
1870   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
1871   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
1872   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
1873   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1878 {
1879   Mat_MPIDense          *c=(Mat_MPIDense*)C->data;
1880   Mat_MatTransMultDense *abt = c->abtdense;
1881   PetscErrorCode        ierr;
1882 
1883   PetscFunctionBegin;
1884   switch (abt->alg) {
1885   case 1:
1886     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
1887     break;
1888   default:
1889     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
1890     break;
1891   }
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1896 {
1897   PetscErrorCode   ierr;
1898   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1899   Mat_MatMultDense *ab = a->abdense;
1900 
1901   PetscFunctionBegin;
1902   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1903   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1904   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1905 
1906   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1907   ierr = PetscFree(ab);CHKERRQ(ierr);
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 #if defined(PETSC_HAVE_ELEMENTAL)
1912 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1913 {
1914   PetscErrorCode   ierr;
1915   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1916   Mat_MatMultDense *ab=c->abdense;
1917 
1918   PetscFunctionBegin;
1919   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1920   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1921   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1922   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
1927 {
1928   PetscErrorCode   ierr;
1929   Mat              Ae,Be,Ce;
1930   Mat_MPIDense     *c;
1931   Mat_MatMultDense *ab;
1932 
1933   PetscFunctionBegin;
1934   /* check local size of A and B */
1935   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1936     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1937   }
1938 
1939   /* create elemental matrices Ae and Be */
1940   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
1941   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1942   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
1943   ierr = MatSetUp(Ae);CHKERRQ(ierr);
1944   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1945 
1946   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
1947   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
1948   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
1949   ierr = MatSetUp(Be);CHKERRQ(ierr);
1950   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1951 
1952   /* compute symbolic Ce = Ae*Be */
1953   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
1954   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
1955 
1956   /* setup C */
1957   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1958   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
1959   ierr = MatSetUp(C);CHKERRQ(ierr);
1960 
1961   /* create data structure for reuse Cdense */
1962   ierr = PetscNew(&ab);CHKERRQ(ierr);
1963   c                  = (Mat_MPIDense*)C->data;
1964   c->abdense         = ab;
1965 
1966   ab->Ae             = Ae;
1967   ab->Be             = Be;
1968   ab->Ce             = Ce;
1969   ab->destroy        = C->ops->destroy;
1970   C->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1971   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1972   C->ops->productnumeric = MatProductNumeric_AB;
1973   PetscFunctionReturn(0);
1974 }
1975 #endif
1976 /* ----------------------------------------------- */
1977 #if defined(PETSC_HAVE_ELEMENTAL)
1978 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
1979 {
1980   PetscFunctionBegin;
1981   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
1982   C->ops->productsymbolic = MatProductSymbolic_AB;
1983   C->ops->productnumeric  = MatProductNumeric_AB;
1984   PetscFunctionReturn(0);
1985 }
1986 #endif
1987 
1988 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C)
1989 {
1990   PetscErrorCode ierr;
1991   Mat_Product    *product = C->product;
1992 
1993   PetscFunctionBegin;
1994   ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr);
1995   C->ops->productnumeric          = MatProductNumeric_AtB;
1996   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense;
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2001 {
2002   Mat_Product *product = C->product;
2003   Mat         A = product->A,B=product->B;
2004 
2005   PetscFunctionBegin;
2006   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2007     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2008 
2009   C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense;
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2014 {
2015   PetscErrorCode ierr;
2016   Mat_Product    *product = C->product;
2017   const char     *algTypes[2] = {"allgatherv","cyclic"};
2018   PetscInt       alg,nalg = 2;
2019   PetscBool      flg = PETSC_FALSE;
2020 
2021   PetscFunctionBegin;
2022   /* Set default algorithm */
2023   alg = 0; /* default is allgatherv */
2024   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2025   if (flg) {
2026     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2027   }
2028 
2029   /* Get runtime option */
2030   if (product->api_user) {
2031     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2032     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2033     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2034   } else {
2035     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2036     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2037     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2038   }
2039   if (flg) {
2040     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2041   }
2042 
2043   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2044   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2045   C->ops->productnumeric           = MatProductNumeric_ABt;
2046   PetscFunctionReturn(0);
2047 }
2048 
2049 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2050 {
2051   PetscErrorCode ierr;
2052   Mat_Product    *product = C->product;
2053 
2054   PetscFunctionBegin;
2055   switch (product->type) {
2056 #if defined(PETSC_HAVE_ELEMENTAL)
2057   case MATPRODUCT_AB:
2058     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2059     break;
2060 #endif
2061   case MATPRODUCT_AtB:
2062     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
2063     break;
2064   case MATPRODUCT_ABt:
2065     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
2066     break;
2067   default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type is not supported");
2068   }
2069   PetscFunctionReturn(0);
2070 }
2071