xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision c688d0420b4e513ff34944d1e1ad7d4e50aafa8d)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatDenseGetLocalMatrix"
13 /*@
14 
15       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
16               matrix that represents the operator. For sequential matrices it returns itself.
17 
18     Input Parameter:
19 .      A - the Seq or MPI dense matrix
20 
21     Output Parameter:
22 .      B - the inner matrix
23 
24     Level: intermediate
25 
26 @*/
27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
28 {
29   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
30   PetscErrorCode ierr;
31   PetscBool      flg;
32 
33   PetscFunctionBegin;
34   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
35   if (flg) *B = mat->A;
36   else *B = A;
37   PetscFunctionReturn(0);
38 }
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatGetRow_MPIDense"
42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
43 {
44   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
45   PetscErrorCode ierr;
46   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
47 
48   PetscFunctionBegin;
49   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
50   lrow = row - rstart;
51   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
52   PetscFunctionReturn(0);
53 }
54 
55 #undef __FUNCT__
56 #define __FUNCT__ "MatRestoreRow_MPIDense"
57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
58 {
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
63   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
69 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70 {
71   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
72   PetscErrorCode ierr;
73   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
74   PetscScalar    *array;
75   MPI_Comm       comm;
76   Mat            B;
77 
78   PetscFunctionBegin;
79   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
80 
81   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
82   if (!B) {
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSetValues_MPIDense"
101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102 {
103   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106   PetscBool      roworiented = A->roworiented;
107 
108   PetscFunctionBegin;
109   for (i=0; i<m; i++) {
110     if (idxm[i] < 0) continue;
111     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112     if (idxm[i] >= rstart && idxm[i] < rend) {
113       row = idxm[i] - rstart;
114       if (roworiented) {
115         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
116       } else {
117         for (j=0; j<n; j++) {
118           if (idxn[j] < 0) continue;
119           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
121         }
122       }
123     } else if (!A->donotstash) {
124       mat->assembled = PETSC_FALSE;
125       if (roworiented) {
126         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
127       } else {
128         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
129       }
130     }
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatGetValues_MPIDense"
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatDenseGetArray_MPIDense"
161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162 {
163   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
164   PetscErrorCode ierr;
165 
166   PetscFunctionBegin;
167   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174 {
175   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
176   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
177   PetscErrorCode ierr;
178   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179   const PetscInt *irow,*icol;
180   PetscScalar    *av,*bv,*v = lmat->v;
181   Mat            newmat;
182   IS             iscol_local;
183 
184   PetscFunctionBegin;
185   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
186   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
187   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
190   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
191 
192   /* No parallel redistribution currently supported! Should really check each index set
193      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
194      original matrix! */
195 
196   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
197   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
198 
199   /* Check submatrix call */
200   if (scall == MAT_REUSE_MATRIX) {
201     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202     /* Really need to test rows and column sizes! */
203     newmat = *B;
204   } else {
205     /* Create and fill new matrix */
206     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
207     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
208     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
209     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
210   }
211 
212   /* Now extract the data pointers and do the copy, column at a time */
213   newmatd = (Mat_MPIDense*)newmat->data;
214   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
215 
216   for (i=0; i<Ncols; i++) {
217     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218     for (j=0; j<nrows; j++) {
219       *bv++ = av[irow[j] - rstart];
220     }
221   }
222 
223   /* Assemble the matrices so that the correct flags are set */
224   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226 
227   /* Free work space */
228   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
229   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
230   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
231   *B   = newmat;
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238 {
239   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
240   PetscErrorCode ierr;
241 
242   PetscFunctionBegin;
243   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273 {
274   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275   PetscErrorCode ierr;
276   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277   PetscMPIInt    n;
278   PetscScalar    *val;
279 
280   PetscFunctionBegin;
281   /*  wait on receives */
282   while (1) {
283     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
284     if (!flg) break;
285 
286     for (i=0; i<n;) {
287       /* Now identify the consecutive vals belonging to the same row */
288       for (j=i,rstart=row[j]; j<n; j++) {
289         if (row[j] != rstart) break;
290       }
291       if (j < n) ncols = j-i;
292       else       ncols = n-i;
293       /* Now assemble all these values with a single function call */
294       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
295       i    = j;
296     }
297   }
298   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
299 
300   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
302 
303   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatZeroEntries_MPIDense"
311 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312 {
313   PetscErrorCode ierr;
314   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
315 
316   PetscFunctionBegin;
317   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 /* the code does not do the diagonal entries correctly unless the
322    matrix is square and the column and row owerships are identical.
323    This is a BUG. The only way to fix it seems to be to access
324    mdn->A and mdn->B directly and not through the MatZeroRows()
325    routine.
326 */
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroRows_MPIDense"
329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330 {
331   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332   PetscErrorCode    ierr;
333   PetscInt          i,*owners = A->rmap->range;
334   PetscInt          *sizes,j,idx,nsends;
335   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337   PetscInt          *lens,*lrows,*values;
338   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339   MPI_Comm          comm;
340   MPI_Request       *send_waits,*recv_waits;
341   MPI_Status        recv_status,*send_status;
342   PetscBool         found;
343   const PetscScalar *xx;
344   PetscScalar       *bb;
345 
346   PetscFunctionBegin;
347   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
348   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350   /*  first count number of contributors to each processor */
351   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
352   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
353   for (i=0; i<N; i++) {
354     idx   = rows[i];
355     found = PETSC_FALSE;
356     for (j=0; j<size; j++) {
357       if (idx >= owners[j] && idx < owners[j+1]) {
358         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359       }
360     }
361     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362   }
363   nsends = 0;
364   for (i=0; i<size; i++) nsends += sizes[2*i+1];
365 
366   /* inform other processors of number of messages and max length*/
367   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
368 
369   /* post receives:   */
370   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
371   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
372   for (i=0; i<nrecvs; i++) {
373     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
374   }
375 
376   /* do sends:
377       1) starts[i] gives the starting index in svalues for stuff going to
378          the ith processor
379   */
380   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
381   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
382   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
383 
384   starts[0] = 0;
385   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
387 
388   starts[0] = 0;
389   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390   count = 0;
391   for (i=0; i<size; i++) {
392     if (sizes[2*i+1]) {
393       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
394     }
395   }
396   ierr = PetscFree(starts);CHKERRQ(ierr);
397 
398   base = owners[rank];
399 
400   /*  wait on receives */
401   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
402   count = nrecvs;
403   slen  = 0;
404   while (count) {
405     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
406     /* unpack receives into our local space */
407     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
408 
409     source[imdex] = recv_status.MPI_SOURCE;
410     lens[imdex]   = n;
411     slen += n;
412     count--;
413   }
414   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
415 
416   /* move the data into the send scatter */
417   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
418   count = 0;
419   for (i=0; i<nrecvs; i++) {
420     values = rvalues + i*nmax;
421     for (j=0; j<lens[i]; j++) {
422       lrows[count++] = values[j] - base;
423     }
424   }
425   ierr = PetscFree(rvalues);CHKERRQ(ierr);
426   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
427   ierr = PetscFree(owner);CHKERRQ(ierr);
428   ierr = PetscFree(sizes);CHKERRQ(ierr);
429 
430   /* fix right hand side if needed */
431   if (x && b) {
432     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
433     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
434     for (i=0; i<slen; i++) {
435       bb[lrows[i]] = diag*xx[lrows[i]];
436     }
437     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
439   }
440 
441   /* actually zap the local rows */
442   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
443   if (diag != 0.0) {
444     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445     PetscInt     m   = ll->lda, i;
446 
447     for (i=0; i<slen; i++) {
448       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449     }
450   }
451   ierr = PetscFree(lrows);CHKERRQ(ierr);
452 
453   /* wait on sends */
454   if (nsends) {
455     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
456     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
457     ierr = PetscFree(send_status);CHKERRQ(ierr);
458   }
459   ierr = PetscFree(send_waits);CHKERRQ(ierr);
460   ierr = PetscFree(svalues);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
465 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
466 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
467 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatMult_MPIDense"
471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
472 {
473   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
474   PetscErrorCode ierr;
475 
476   PetscFunctionBegin;
477   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatMultAdd_MPIDense"
485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
486 {
487   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
488   PetscErrorCode ierr;
489 
490   PetscFunctionBegin;
491   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "MatMultTranspose_MPIDense"
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
502   PetscErrorCode ierr;
503   PetscScalar    zero = 0.0;
504 
505   PetscFunctionBegin;
506   ierr = VecSet(yy,zero);CHKERRQ(ierr);
507   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
508   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
516 {
517   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
518   PetscErrorCode ierr;
519 
520   PetscFunctionBegin;
521   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
522   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
523   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNCT__
529 #define __FUNCT__ "MatGetDiagonal_MPIDense"
530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
531 {
532   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
533   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
534   PetscErrorCode ierr;
535   PetscInt       len,i,n,m = A->rmap->n,radd;
536   PetscScalar    *x,zero = 0.0;
537 
538   PetscFunctionBegin;
539   ierr = VecSet(v,zero);CHKERRQ(ierr);
540   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
541   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
542   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
543   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
544   radd = A->rmap->rstart*m;
545   for (i=0; i<len; i++) {
546     x[i] = aloc->v[radd + i*m + i];
547   }
548   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatDestroy_MPIDense"
554 PetscErrorCode MatDestroy_MPIDense(Mat mat)
555 {
556   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
557   PetscErrorCode ierr;
558 
559   PetscFunctionBegin;
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
565   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
566   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
567 
568   ierr = PetscFree(mat->data);CHKERRQ(ierr);
569   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
570 
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
573 #if defined(PETSC_HAVE_ELEMENTAL)
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
575 #endif
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNCT__
587 #define __FUNCT__ "MatView_MPIDense_Binary"
588 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
589 {
590   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
591   PetscErrorCode    ierr;
592   PetscViewerFormat format;
593   int               fd;
594   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
595   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
596   PetscScalar       *work,*v,*vv;
597   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
598 
599   PetscFunctionBegin;
600   if (mdn->size == 1) {
601     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
602   } else {
603     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
604     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
605     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
606 
607     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
608     if (format == PETSC_VIEWER_NATIVE) {
609 
610       if (!rank) {
611         /* store the matrix as a dense matrix */
612         header[0] = MAT_FILE_CLASSID;
613         header[1] = mat->rmap->N;
614         header[2] = N;
615         header[3] = MATRIX_BINARY_FORMAT_DENSE;
616         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
617 
618         /* get largest work array needed for transposing array */
619         mmax = mat->rmap->n;
620         for (i=1; i<size; i++) {
621           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
622         }
623         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
624 
625         /* write out local array, by rows */
626         m = mat->rmap->n;
627         v = a->v;
628         for (j=0; j<N; j++) {
629           for (i=0; i<m; i++) {
630             work[j + i*N] = *v++;
631           }
632         }
633         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
634         /* get largest work array to receive messages from other processes, excludes process zero */
635         mmax = 0;
636         for (i=1; i<size; i++) {
637           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
638         }
639         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
640         for (k = 1; k < size; k++) {
641           v    = vv;
642           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
643           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
644 
645           for (j = 0; j < N; j++) {
646             for (i = 0; i < m; i++) {
647               work[j + i*N] = *v++;
648             }
649           }
650           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
651         }
652         ierr = PetscFree(work);CHKERRQ(ierr);
653         ierr = PetscFree(vv);CHKERRQ(ierr);
654       } else {
655         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
656       }
657     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
658   }
659   PetscFunctionReturn(0);
660 }
661 
662 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
663 #include <petscdraw.h>
664 #undef __FUNCT__
665 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
666 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
667 {
668   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
669   PetscErrorCode    ierr;
670   PetscMPIInt       rank = mdn->rank;
671   PetscViewerType   vtype;
672   PetscBool         iascii,isdraw;
673   PetscViewer       sviewer;
674   PetscViewerFormat format;
675 
676   PetscFunctionBegin;
677   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
678   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
679   if (iascii) {
680     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
681     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
682     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
683       MatInfo info;
684       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
685       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
686       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
687       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
688       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
689       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
690       PetscFunctionReturn(0);
691     } else if (format == PETSC_VIEWER_ASCII_INFO) {
692       PetscFunctionReturn(0);
693     }
694   } else if (isdraw) {
695     PetscDraw draw;
696     PetscBool isnull;
697 
698     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
699     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
700     if (isnull) PetscFunctionReturn(0);
701   }
702 
703   {
704     /* assemble the entire matrix onto first processor. */
705     Mat         A;
706     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
707     PetscInt    *cols;
708     PetscScalar *vals;
709 
710     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
713     } else {
714       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
715     }
716     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
717     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
718     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
719     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
720 
721     /* Copy the matrix ... This isn't the most efficient means,
722        but it's quick for now */
723     A->insertmode = INSERT_VALUES;
724 
725     row = mat->rmap->rstart;
726     m   = mdn->A->rmap->n;
727     for (i=0; i<m; i++) {
728       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
729       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
730       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
731       row++;
732     }
733 
734     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
735     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
736     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
737     if (!rank) {
738         ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
739     }
740     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
741     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
742     ierr = MatDestroy(&A);CHKERRQ(ierr);
743   }
744   PetscFunctionReturn(0);
745 }
746 
747 #undef __FUNCT__
748 #define __FUNCT__ "MatView_MPIDense"
749 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
750 {
751   PetscErrorCode ierr;
752   PetscBool      iascii,isbinary,isdraw,issocket;
753 
754   PetscFunctionBegin;
755   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
756   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
757   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
758   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
759 
760   if (iascii || issocket || isdraw) {
761     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
762   } else if (isbinary) {
763     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatGetInfo_MPIDense"
770 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
771 {
772   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
773   Mat            mdn  = mat->A;
774   PetscErrorCode ierr;
775   PetscReal      isend[5],irecv[5];
776 
777   PetscFunctionBegin;
778   info->block_size = 1.0;
779 
780   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
781 
782   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
783   isend[3] = info->memory;  isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
792 
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   } else if (flag == MAT_GLOBAL_SUM) {
799     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
800 
801     info->nz_used      = irecv[0];
802     info->nz_allocated = irecv[1];
803     info->nz_unneeded  = irecv[2];
804     info->memory       = irecv[3];
805     info->mallocs      = irecv[4];
806   }
807   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
808   info->fill_ratio_needed = 0;
809   info->factor_mallocs    = 0;
810   PetscFunctionReturn(0);
811 }
812 
813 #undef __FUNCT__
814 #define __FUNCT__ "MatSetOption_MPIDense"
815 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
816 {
817   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
818   PetscErrorCode ierr;
819 
820   PetscFunctionBegin;
821   switch (op) {
822   case MAT_NEW_NONZERO_LOCATIONS:
823   case MAT_NEW_NONZERO_LOCATION_ERR:
824   case MAT_NEW_NONZERO_ALLOCATION_ERR:
825     MatCheckPreallocated(A,1);
826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
827     break;
828   case MAT_ROW_ORIENTED:
829     MatCheckPreallocated(A,1);
830     a->roworiented = flg;
831     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
832     break;
833   case MAT_NEW_DIAGONALS:
834   case MAT_KEEP_NONZERO_PATTERN:
835   case MAT_USE_HASH_TABLE:
836     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
837     break;
838   case MAT_IGNORE_OFF_PROC_ENTRIES:
839     a->donotstash = flg;
840     break;
841   case MAT_SYMMETRIC:
842   case MAT_STRUCTURALLY_SYMMETRIC:
843   case MAT_HERMITIAN:
844   case MAT_SYMMETRY_ETERNAL:
845   case MAT_IGNORE_LOWER_TRIANGULAR:
846     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
847     break;
848   default:
849     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 
855 #undef __FUNCT__
856 #define __FUNCT__ "MatDiagonalScale_MPIDense"
857 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
858 {
859   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
860   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
861   PetscScalar    *l,*r,x,*v;
862   PetscErrorCode ierr;
863   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
864 
865   PetscFunctionBegin;
866   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
867   if (ll) {
868     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
869     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
870     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
871     for (i=0; i<m; i++) {
872       x = l[i];
873       v = mat->v + i;
874       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
875     }
876     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
877     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
878   }
879   if (rr) {
880     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
881     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
882     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
885     for (i=0; i<n; i++) {
886       x = r[i];
887       v = mat->v + i*m;
888       for (j=0; j<m; j++) (*v++) *= x;
889     }
890     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
891     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
892   }
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatNorm_MPIDense"
898 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
899 {
900   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
901   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
902   PetscErrorCode ierr;
903   PetscInt       i,j;
904   PetscReal      sum = 0.0;
905   PetscScalar    *v  = mat->v;
906 
907   PetscFunctionBegin;
908   if (mdn->size == 1) {
909     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
910   } else {
911     if (type == NORM_FROBENIUS) {
912       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
913         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
914       }
915       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
916       *nrm = PetscSqrtReal(*nrm);
917       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
918     } else if (type == NORM_1) {
919       PetscReal *tmp,*tmp2;
920       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
921       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
922       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
923       *nrm = 0.0;
924       v    = mat->v;
925       for (j=0; j<mdn->A->cmap->n; j++) {
926         for (i=0; i<mdn->A->rmap->n; i++) {
927           tmp[j] += PetscAbsScalar(*v);  v++;
928         }
929       }
930       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
931       for (j=0; j<A->cmap->N; j++) {
932         if (tmp2[j] > *nrm) *nrm = tmp2[j];
933       }
934       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
935       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
936     } else if (type == NORM_INFINITY) { /* max row norm */
937       PetscReal ntemp;
938       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
939       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
940     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatTranspose_MPIDense"
947 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
948 {
949   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
950   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
951   Mat            B;
952   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
953   PetscErrorCode ierr;
954   PetscInt       j,i;
955   PetscScalar    *v;
956 
957   PetscFunctionBegin;
958   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
959   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
960     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
961     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
962     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
963     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
964   } else {
965     B = *matout;
966   }
967 
968   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
969   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
970   for (i=0; i<m; i++) rwork[i] = rstart + i;
971   for (j=0; j<n; j++) {
972     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
973     v   += m;
974   }
975   ierr = PetscFree(rwork);CHKERRQ(ierr);
976   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
979     *matout = B;
980   } else {
981     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 
987 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
988 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatSetUp_MPIDense"
992 PetscErrorCode MatSetUp_MPIDense(Mat A)
993 {
994   PetscErrorCode ierr;
995 
996   PetscFunctionBegin;
997   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatAXPY_MPIDense"
1003 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1004 {
1005   PetscErrorCode ierr;
1006   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1007 
1008   PetscFunctionBegin;
1009   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1010   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatConjugate_MPIDense"
1016 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1017 {
1018   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1019   PetscErrorCode ierr;
1020 
1021   PetscFunctionBegin;
1022   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 #undef __FUNCT__
1027 #define __FUNCT__ "MatRealPart_MPIDense"
1028 PetscErrorCode MatRealPart_MPIDense(Mat A)
1029 {
1030   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1040 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1041 {
1042   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1053 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1054 {
1055   PetscErrorCode ierr;
1056   PetscInt       i,n;
1057   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1058   PetscReal      *work;
1059 
1060   PetscFunctionBegin;
1061   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1062   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1063   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1064   if (type == NORM_2) {
1065     for (i=0; i<n; i++) work[i] *= work[i];
1066   }
1067   if (type == NORM_INFINITY) {
1068     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1069   } else {
1070     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(work);CHKERRQ(ierr);
1073   if (type == NORM_2) {
1074     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatSetRandom_MPIDense"
1081 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1082 {
1083   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1084   PetscErrorCode ierr;
1085   PetscScalar    *a;
1086   PetscInt       m,n,i;
1087 
1088   PetscFunctionBegin;
1089   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1090   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1091   for (i=0; i<m*n; i++) {
1092     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1093   }
1094   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMissingDiagonal_MPIDense"
1102 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1103 {
1104   PetscFunctionBegin;
1105   *missing = PETSC_FALSE;
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 /* -------------------------------------------------------------------*/
1110 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1111                                         MatGetRow_MPIDense,
1112                                         MatRestoreRow_MPIDense,
1113                                         MatMult_MPIDense,
1114                                 /*  4*/ MatMultAdd_MPIDense,
1115                                         MatMultTranspose_MPIDense,
1116                                         MatMultTransposeAdd_MPIDense,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                 /* 10*/ 0,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                         MatTranspose_MPIDense,
1125                                 /* 15*/ MatGetInfo_MPIDense,
1126                                         MatEqual_MPIDense,
1127                                         MatGetDiagonal_MPIDense,
1128                                         MatDiagonalScale_MPIDense,
1129                                         MatNorm_MPIDense,
1130                                 /* 20*/ MatAssemblyBegin_MPIDense,
1131                                         MatAssemblyEnd_MPIDense,
1132                                         MatSetOption_MPIDense,
1133                                         MatZeroEntries_MPIDense,
1134                                 /* 24*/ MatZeroRows_MPIDense,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                 /* 29*/ MatSetUp_MPIDense,
1140                                         0,
1141                                         0,
1142                                         MatGetDiagonalBlock_MPIDense,
1143                                         0,
1144                                 /* 34*/ MatDuplicate_MPIDense,
1145                                         0,
1146                                         0,
1147                                         0,
1148                                         0,
1149                                 /* 39*/ MatAXPY_MPIDense,
1150                                         MatGetSubMatrices_MPIDense,
1151                                         0,
1152                                         MatGetValues_MPIDense,
1153                                         0,
1154                                 /* 44*/ 0,
1155                                         MatScale_MPIDense,
1156                                         MatShift_Basic,
1157                                         0,
1158                                         0,
1159                                 /* 49*/ MatSetRandom_MPIDense,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                 /* 54*/ 0,
1165                                         0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                 /* 59*/ MatGetSubMatrix_MPIDense,
1170                                         MatDestroy_MPIDense,
1171                                         MatView_MPIDense,
1172                                         0,
1173                                         0,
1174                                 /* 64*/ 0,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                 /* 69*/ 0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                 /* 74*/ 0,
1185                                         0,
1186                                         0,
1187                                         0,
1188                                         0,
1189                                 /* 79*/ 0,
1190                                         0,
1191                                         0,
1192                                         0,
1193                                 /* 83*/ MatLoad_MPIDense,
1194                                         0,
1195                                         0,
1196                                         0,
1197                                         0,
1198                                         0,
1199 #if defined(PETSC_HAVE_ELEMENTAL)
1200                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1201                                         MatMatMultSymbolic_MPIDense_MPIDense,
1202 #else
1203                                 /* 89*/ 0,
1204                                         0,
1205 #endif
1206                                         MatMatMultNumeric_MPIDense,
1207                                         0,
1208                                         0,
1209                                 /* 94*/ 0,
1210                                         0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                 /* 99*/ 0,
1215                                         0,
1216                                         0,
1217                                         MatConjugate_MPIDense,
1218                                         0,
1219                                 /*104*/ 0,
1220                                         MatRealPart_MPIDense,
1221                                         MatImaginaryPart_MPIDense,
1222                                         0,
1223                                         0,
1224                                 /*109*/ 0,
1225                                         0,
1226                                         0,
1227                                         0,
1228                                         MatMissingDiagonal_MPIDense,
1229                                 /*114*/ 0,
1230                                         0,
1231                                         0,
1232                                         0,
1233                                         0,
1234                                 /*119*/ 0,
1235                                         0,
1236                                         0,
1237                                         0,
1238                                         0,
1239                                 /*124*/ 0,
1240                                         MatGetColumnNorms_MPIDense,
1241                                         0,
1242                                         0,
1243                                         0,
1244                                 /*129*/ 0,
1245                                         MatTransposeMatMult_MPIDense_MPIDense,
1246                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1247                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1248                                         0,
1249                                 /*134*/ 0,
1250                                         0,
1251                                         0,
1252                                         0,
1253                                         0,
1254                                 /*139*/ 0,
1255                                         0,
1256                                         0
1257 };
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1261 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1262 {
1263   Mat_MPIDense   *a;
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   mat->preallocated = PETSC_TRUE;
1268   /* Note:  For now, when data is specified above, this assumes the user correctly
1269    allocates the local dense storage space.  We should add error checking. */
1270 
1271   a       = (Mat_MPIDense*)mat->data;
1272   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1273   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1274   a->nvec = mat->cmap->n;
1275 
1276   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1277   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1278   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1279   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1280   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #if defined(PETSC_HAVE_ELEMENTAL)
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatConvert_MPIDense_Elemental"
1287 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1288 {
1289   Mat            mat_elemental;
1290   PetscErrorCode ierr;
1291   PetscScalar    *v;
1292   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1293 
1294   PetscFunctionBegin;
1295   if (reuse == MAT_REUSE_MATRIX) {
1296     mat_elemental = *newmat;
1297     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1298   } else {
1299     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1300     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1301     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1302     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1303     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1304   }
1305 
1306   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1307   for (i=0; i<N; i++) cols[i] = i;
1308   for (i=0; i<m; i++) rows[i] = rstart + i;
1309 
1310   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1311   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1312   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1313   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1314   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1315   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1316   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1317 
1318   if (reuse == MAT_INPLACE_MATRIX) {
1319     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1320   } else {
1321     *newmat = mat_elemental;
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 #endif
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatCreate_MPIDense"
1329 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1330 {
1331   Mat_MPIDense   *a;
1332   PetscErrorCode ierr;
1333 
1334   PetscFunctionBegin;
1335   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1336   mat->data = (void*)a;
1337   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1338 
1339   mat->insertmode = NOT_SET_VALUES;
1340   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1341   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1342 
1343   /* build cache for off array entries formed */
1344   a->donotstash = PETSC_FALSE;
1345 
1346   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1347 
1348   /* stuff used for matrix vector multiply */
1349   a->lvec        = 0;
1350   a->Mvctx       = 0;
1351   a->roworiented = PETSC_TRUE;
1352 
1353   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1355 #if defined(PETSC_HAVE_ELEMENTAL)
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1357 #endif
1358   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1360   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1362 
1363   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1364   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1365   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1366   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 /*MC
1371    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1372 
1373    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1374    and MATMPIDENSE otherwise.
1375 
1376    Options Database Keys:
1377 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1378 
1379   Level: beginner
1380 
1381 
1382 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1383 M*/
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1387 /*@C
1388    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1389 
1390    Not collective
1391 
1392    Input Parameters:
1393 .  B - the matrix
1394 -  data - optional location of matrix data.  Set data=NULL for PETSc
1395    to control all matrix memory allocation.
1396 
1397    Notes:
1398    The dense format is fully compatible with standard Fortran 77
1399    storage by columns.
1400 
1401    The data input variable is intended primarily for Fortran programmers
1402    who wish to allocate their own matrix memory space.  Most users should
1403    set data=NULL.
1404 
1405    Level: intermediate
1406 
1407 .keywords: matrix,dense, parallel
1408 
1409 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1410 @*/
1411 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1412 {
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatCreateDense"
1422 /*@C
1423    MatCreateDense - Creates a parallel matrix in dense format.
1424 
1425    Collective on MPI_Comm
1426 
1427    Input Parameters:
1428 +  comm - MPI communicator
1429 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1430 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1431 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1432 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1433 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1434    to control all matrix memory allocation.
1435 
1436    Output Parameter:
1437 .  A - the matrix
1438 
1439    Notes:
1440    The dense format is fully compatible with standard Fortran 77
1441    storage by columns.
1442 
1443    The data input variable is intended primarily for Fortran programmers
1444    who wish to allocate their own matrix memory space.  Most users should
1445    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1446 
1447    The user MUST specify either the local or global matrix dimensions
1448    (possibly both).
1449 
1450    Level: intermediate
1451 
1452 .keywords: matrix,dense, parallel
1453 
1454 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1455 @*/
1456 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1457 {
1458   PetscErrorCode ierr;
1459   PetscMPIInt    size;
1460 
1461   PetscFunctionBegin;
1462   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1463   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1464   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1465   if (size > 1) {
1466     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1467     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1468     if (data) {  /* user provided data array, so no need to assemble */
1469       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1470       (*A)->assembled = PETSC_TRUE;
1471     }
1472   } else {
1473     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1474     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1475   }
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatDuplicate_MPIDense"
1481 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1482 {
1483   Mat            mat;
1484   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1485   PetscErrorCode ierr;
1486 
1487   PetscFunctionBegin;
1488   *newmat = 0;
1489   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1490   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1491   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1492   a       = (Mat_MPIDense*)mat->data;
1493   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1494 
1495   mat->factortype   = A->factortype;
1496   mat->assembled    = PETSC_TRUE;
1497   mat->preallocated = PETSC_TRUE;
1498 
1499   a->size         = oldmat->size;
1500   a->rank         = oldmat->rank;
1501   mat->insertmode = NOT_SET_VALUES;
1502   a->nvec         = oldmat->nvec;
1503   a->donotstash   = oldmat->donotstash;
1504 
1505   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1506   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1507 
1508   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1509   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1510   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1511 
1512   *newmat = mat;
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1518 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1519 {
1520   PetscErrorCode ierr;
1521   PetscMPIInt    rank,size;
1522   const PetscInt *rowners;
1523   PetscInt       i,m,n,nz,j,mMax;
1524   PetscScalar    *array,*vals,*vals_ptr;
1525   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1526 
1527   PetscFunctionBegin;
1528   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1529   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1530 
1531   /* determine ownership of rows and columns */
1532   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1533   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1534 
1535   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1536   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1537     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1538   }
1539   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1540   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1541   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1542   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1543   if (!rank) {
1544     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1545 
1546     /* read in my part of the matrix numerical values  */
1547     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1548 
1549     /* insert into matrix-by row (this is why cannot directly read into array */
1550     vals_ptr = vals;
1551     for (i=0; i<m; i++) {
1552       for (j=0; j<N; j++) {
1553         array[i + j*m] = *vals_ptr++;
1554       }
1555     }
1556 
1557     /* read in other processors and ship out */
1558     for (i=1; i<size; i++) {
1559       nz   = (rowners[i+1] - rowners[i])*N;
1560       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1561       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1562     }
1563   } else {
1564     /* receive numeric values */
1565     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1566 
1567     /* receive message of values*/
1568     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1569 
1570     /* insert into matrix-by row (this is why cannot directly read into array */
1571     vals_ptr = vals;
1572     for (i=0; i<m; i++) {
1573       for (j=0; j<N; j++) {
1574         array[i + j*m] = *vals_ptr++;
1575       }
1576     }
1577   }
1578   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1579   ierr = PetscFree(vals);CHKERRQ(ierr);
1580   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1581   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatLoad_MPIDense"
1587 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1588 {
1589   Mat_MPIDense   *a;
1590   PetscScalar    *vals,*svals;
1591   MPI_Comm       comm;
1592   MPI_Status     status;
1593   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1594   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1595   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1596   PetscInt       i,nz,j,rstart,rend;
1597   int            fd;
1598   PetscErrorCode ierr;
1599 
1600   PetscFunctionBegin;
1601   /* force binary viewer to load .info file if it has not yet done so */
1602   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1603   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1604   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1605   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1606   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1607   if (!rank) {
1608     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1609     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1610   }
1611   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1612   M    = header[1]; N = header[2]; nz = header[3];
1613 
1614   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1615   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1616   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1617 
1618   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1619   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1620 
1621   /*
1622        Handle case where matrix is stored on disk as a dense matrix
1623   */
1624   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1625     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1626     PetscFunctionReturn(0);
1627   }
1628 
1629   /* determine ownership of all rows */
1630   if (newmat->rmap->n < 0) {
1631     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1632   } else {
1633     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1634   }
1635   if (newmat->cmap->n < 0) {
1636     n = PETSC_DECIDE;
1637   } else {
1638     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1639   }
1640 
1641   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1642   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1643   rowners[0] = 0;
1644   for (i=2; i<=size; i++) {
1645     rowners[i] += rowners[i-1];
1646   }
1647   rstart = rowners[rank];
1648   rend   = rowners[rank+1];
1649 
1650   /* distribute row lengths to all processors */
1651   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1652   if (!rank) {
1653     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1654     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1655     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1656     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1657     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1658     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1659   } else {
1660     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1661   }
1662 
1663   if (!rank) {
1664     /* calculate the number of nonzeros on each processor */
1665     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1666     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1667     for (i=0; i<size; i++) {
1668       for (j=rowners[i]; j< rowners[i+1]; j++) {
1669         procsnz[i] += rowlengths[j];
1670       }
1671     }
1672     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1673 
1674     /* determine max buffer needed and allocate it */
1675     maxnz = 0;
1676     for (i=0; i<size; i++) {
1677       maxnz = PetscMax(maxnz,procsnz[i]);
1678     }
1679     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1680 
1681     /* read in my part of the matrix column indices  */
1682     nz   = procsnz[0];
1683     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1684     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1685 
1686     /* read in every one elses and ship off */
1687     for (i=1; i<size; i++) {
1688       nz   = procsnz[i];
1689       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1690       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1691     }
1692     ierr = PetscFree(cols);CHKERRQ(ierr);
1693   } else {
1694     /* determine buffer space needed for message */
1695     nz = 0;
1696     for (i=0; i<m; i++) {
1697       nz += ourlens[i];
1698     }
1699     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1700 
1701     /* receive message of column indices*/
1702     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1703     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1704     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1705   }
1706 
1707   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1708   a = (Mat_MPIDense*)newmat->data;
1709   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1710     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1711   }
1712 
1713   if (!rank) {
1714     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1715 
1716     /* read in my part of the matrix numerical values  */
1717     nz   = procsnz[0];
1718     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1719 
1720     /* insert into matrix */
1721     jj      = rstart;
1722     smycols = mycols;
1723     svals   = vals;
1724     for (i=0; i<m; i++) {
1725       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1726       smycols += ourlens[i];
1727       svals   += ourlens[i];
1728       jj++;
1729     }
1730 
1731     /* read in other processors and ship out */
1732     for (i=1; i<size; i++) {
1733       nz   = procsnz[i];
1734       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1735       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1736     }
1737     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1738   } else {
1739     /* receive numeric values */
1740     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1741 
1742     /* receive message of values*/
1743     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1744     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1745     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1746 
1747     /* insert into matrix */
1748     jj      = rstart;
1749     smycols = mycols;
1750     svals   = vals;
1751     for (i=0; i<m; i++) {
1752       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1753       smycols += ourlens[i];
1754       svals   += ourlens[i];
1755       jj++;
1756     }
1757   }
1758   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1759   ierr = PetscFree(vals);CHKERRQ(ierr);
1760   ierr = PetscFree(mycols);CHKERRQ(ierr);
1761   ierr = PetscFree(rowners);CHKERRQ(ierr);
1762 
1763   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1764   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatEqual_MPIDense"
1770 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1771 {
1772   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1773   Mat            a,b;
1774   PetscBool      flg;
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   a    = matA->A;
1779   b    = matB->A;
1780   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1781   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense"
1787 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1788 {
1789   PetscErrorCode        ierr;
1790   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1791   Mat_TransMatMultDense *atb = a->atbdense;
1792 
1793   PetscFunctionBegin;
1794   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1795   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1796   ierr = PetscFree(atb);CHKERRQ(ierr);
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 #undef __FUNCT__
1801 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense"
1802 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1803 {
1804   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1805   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1806   Mat_TransMatMultDense *atb = c->atbdense;
1807   PetscErrorCode ierr;
1808   MPI_Comm       comm;
1809   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1810   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1811   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1812   PetscScalar    _DOne=1.0,_DZero=0.0;
1813   PetscBLASInt   am,an,bn,aN;
1814   const PetscInt *ranges;
1815 
1816   PetscFunctionBegin;
1817   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1818   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1819   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1820 
1821   /* compute atbarray = aseq^T * bseq */
1822   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1823   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1824   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1825   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1826   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1827 
1828   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1829   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1830 
1831   /* arrange atbarray into sendbuf */
1832   k = 0;
1833   for (proc=0; proc<size; proc++) {
1834     for (j=0; j<cN; j++) {
1835       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1836     }
1837   }
1838   /* sum all atbarray to local values of C */
1839   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1840   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1841   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 #undef __FUNCT__
1846 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense"
1847 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1848 {
1849   PetscErrorCode        ierr;
1850   Mat                   Cdense;
1851   MPI_Comm              comm;
1852   PetscMPIInt           size;
1853   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1854   Mat_MPIDense          *c;
1855   Mat_TransMatMultDense *atb;
1856 
1857   PetscFunctionBegin;
1858   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1859   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1860     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1861   }
1862 
1863   /* create matrix product Cdense */
1864   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1865   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1866   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1867   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1868   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1869   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1870   *C   = Cdense;
1871 
1872   /* create data structure for reuse Cdense */
1873   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1874   ierr = PetscNew(&atb);CHKERRQ(ierr);
1875   cM = Cdense->rmap->N;
1876   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1877 
1878   c                    = (Mat_MPIDense*)Cdense->data;
1879   c->atbdense          = atb;
1880   atb->destroy         = Cdense->ops->destroy;
1881   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense"
1887 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1888 {
1889   PetscErrorCode ierr;
1890 
1891   PetscFunctionBegin;
1892   if (scall == MAT_INITIAL_MATRIX) {
1893     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1894   }
1895   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense"
1901 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1902 {
1903   PetscErrorCode   ierr;
1904   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1905   Mat_MatMultDense *ab = a->abdense;
1906 
1907   PetscFunctionBegin;
1908   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1909   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1910   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1911 
1912   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1913   ierr = PetscFree(ab);CHKERRQ(ierr);
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #if defined(PETSC_HAVE_ELEMENTAL)
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1920 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1921 {
1922   PetscErrorCode   ierr;
1923   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1924   Mat_MatMultDense *ab=c->abdense;
1925 
1926   PetscFunctionBegin;
1927   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1928   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1929   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1930   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1936 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1937 {
1938   PetscErrorCode   ierr;
1939   Mat              Ae,Be,Ce;
1940   Mat_MPIDense     *c;
1941   Mat_MatMultDense *ab;
1942 
1943   PetscFunctionBegin;
1944   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1945     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1946   }
1947 
1948   /* convert A and B to Elemental matrices Ae and Be */
1949   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1950   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1951 
1952   /* Ce = Ae*Be */
1953   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1954   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1955 
1956   /* convert Ce to C */
1957   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1958 
1959   /* create data structure for reuse Cdense */
1960   ierr = PetscNew(&ab);CHKERRQ(ierr);
1961   c                  = (Mat_MPIDense*)(*C)->data;
1962   c->abdense         = ab;
1963 
1964   ab->Ae             = Ae;
1965   ab->Be             = Be;
1966   ab->Ce             = Ce;
1967   ab->destroy        = (*C)->ops->destroy;
1968   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1969   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 #undef __FUNCT__
1974 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1975 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1976 {
1977   PetscErrorCode ierr;
1978 
1979   PetscFunctionBegin;
1980   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1981     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1982     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1983     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1984   } else {
1985     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1986     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1987     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 #endif
1992