xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision b41ce5d507ea9a58bfa83cf403107a702e77a67d)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
178 {
179   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
180   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
181   PetscErrorCode ierr;
182   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
183   const PetscInt *irow,*icol;
184   PetscScalar    *av,*bv,*v = lmat->v;
185   Mat            newmat;
186   IS             iscol_local;
187 
188   PetscFunctionBegin;
189   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
190   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
191   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
192   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
193   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
194   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
195 
196   /* No parallel redistribution currently supported! Should really check each index set
197      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
198      original matrix! */
199 
200   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
201   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
202 
203   /* Check submatrix call */
204   if (scall == MAT_REUSE_MATRIX) {
205     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
206     /* Really need to test rows and column sizes! */
207     newmat = *B;
208   } else {
209     /* Create and fill new matrix */
210     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
211     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
212     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
213     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
214   }
215 
216   /* Now extract the data pointers and do the copy, column at a time */
217   newmatd = (Mat_MPIDense*)newmat->data;
218   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
219 
220   for (i=0; i<Ncols; i++) {
221     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
222     for (j=0; j<nrows; j++) {
223       *bv++ = av[irow[j] - rstart];
224     }
225   }
226 
227   /* Assemble the matrices so that the correct flags are set */
228   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
229   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
230 
231   /* Free work space */
232   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
233   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
234   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
235   *B   = newmat;
236   PetscFunctionReturn(0);
237 }
238 
239 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
240 {
241   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
242   PetscErrorCode ierr;
243 
244   PetscFunctionBegin;
245   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
271 {
272   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
273   PetscErrorCode ierr;
274   PetscInt       i,*row,*col,flg,j,rstart,ncols;
275   PetscMPIInt    n;
276   PetscScalar    *val;
277 
278   PetscFunctionBegin;
279   /*  wait on receives */
280   while (1) {
281     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
282     if (!flg) break;
283 
284     for (i=0; i<n;) {
285       /* Now identify the consecutive vals belonging to the same row */
286       for (j=i,rstart=row[j]; j<n; j++) {
287         if (row[j] != rstart) break;
288       }
289       if (j < n) ncols = j-i;
290       else       ncols = n-i;
291       /* Now assemble all these values with a single function call */
292       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
293       i    = j;
294     }
295   }
296   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
297 
298   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
299   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
300 
301   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
302     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
303   }
304   PetscFunctionReturn(0);
305 }
306 
307 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
308 {
309   PetscErrorCode ierr;
310   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
311 
312   PetscFunctionBegin;
313   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
314   PetscFunctionReturn(0);
315 }
316 
317 /* the code does not do the diagonal entries correctly unless the
318    matrix is square and the column and row owerships are identical.
319    This is a BUG. The only way to fix it seems to be to access
320    mdn->A and mdn->B directly and not through the MatZeroRows()
321    routine.
322 */
323 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
324 {
325   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
326   PetscErrorCode    ierr;
327   PetscInt          i,*owners = A->rmap->range;
328   PetscInt          *sizes,j,idx,nsends;
329   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
330   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
331   PetscInt          *lens,*lrows,*values;
332   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
333   MPI_Comm          comm;
334   MPI_Request       *send_waits,*recv_waits;
335   MPI_Status        recv_status,*send_status;
336   PetscBool         found;
337   const PetscScalar *xx;
338   PetscScalar       *bb;
339 
340   PetscFunctionBegin;
341   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
342   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
343   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
344   /*  first count number of contributors to each processor */
345   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
346   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
347   for (i=0; i<N; i++) {
348     idx   = rows[i];
349     found = PETSC_FALSE;
350     for (j=0; j<size; j++) {
351       if (idx >= owners[j] && idx < owners[j+1]) {
352         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
353       }
354     }
355     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
356   }
357   nsends = 0;
358   for (i=0; i<size; i++) nsends += sizes[2*i+1];
359 
360   /* inform other processors of number of messages and max length*/
361   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
362 
363   /* post receives:   */
364   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
365   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
366   for (i=0; i<nrecvs; i++) {
367     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
368   }
369 
370   /* do sends:
371       1) starts[i] gives the starting index in svalues for stuff going to
372          the ith processor
373   */
374   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
375   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
376   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
377 
378   starts[0] = 0;
379   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
380   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
381 
382   starts[0] = 0;
383   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
384   count = 0;
385   for (i=0; i<size; i++) {
386     if (sizes[2*i+1]) {
387       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
388     }
389   }
390   ierr = PetscFree(starts);CHKERRQ(ierr);
391 
392   base = owners[rank];
393 
394   /*  wait on receives */
395   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
396   count = nrecvs;
397   slen  = 0;
398   while (count) {
399     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
400     /* unpack receives into our local space */
401     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
402 
403     source[imdex] = recv_status.MPI_SOURCE;
404     lens[imdex]   = n;
405     slen += n;
406     count--;
407   }
408   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
409 
410   /* move the data into the send scatter */
411   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
412   count = 0;
413   for (i=0; i<nrecvs; i++) {
414     values = rvalues + i*nmax;
415     for (j=0; j<lens[i]; j++) {
416       lrows[count++] = values[j] - base;
417     }
418   }
419   ierr = PetscFree(rvalues);CHKERRQ(ierr);
420   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
421   ierr = PetscFree(owner);CHKERRQ(ierr);
422   ierr = PetscFree(sizes);CHKERRQ(ierr);
423 
424   /* fix right hand side if needed */
425   if (x && b) {
426     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
427     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
428     for (i=0; i<slen; i++) {
429       bb[lrows[i]] = diag*xx[lrows[i]];
430     }
431     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
432     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
433   }
434 
435   /* actually zap the local rows */
436   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
437   if (diag != 0.0) {
438     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
439     PetscInt     m   = ll->lda, i;
440 
441     for (i=0; i<slen; i++) {
442       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
443     }
444   }
445   ierr = PetscFree(lrows);CHKERRQ(ierr);
446 
447   /* wait on sends */
448   if (nsends) {
449     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
450     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
451     ierr = PetscFree(send_status);CHKERRQ(ierr);
452   }
453   ierr = PetscFree(send_waits);CHKERRQ(ierr);
454   ierr = PetscFree(svalues);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
460 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
461 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
462 
463 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
464 {
465   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
466   PetscErrorCode ierr;
467 
468   PetscFunctionBegin;
469   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
470   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
476 {
477   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
478   PetscErrorCode ierr;
479 
480   PetscFunctionBegin;
481   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
482   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
483   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
488 {
489   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
490   PetscErrorCode ierr;
491   PetscScalar    zero = 0.0;
492 
493   PetscFunctionBegin;
494   ierr = VecSet(yy,zero);CHKERRQ(ierr);
495   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
496   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
497   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
502 {
503   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
504   PetscErrorCode ierr;
505 
506   PetscFunctionBegin;
507   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
508   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
509   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
515 {
516   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
517   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
518   PetscErrorCode ierr;
519   PetscInt       len,i,n,m = A->rmap->n,radd;
520   PetscScalar    *x,zero = 0.0;
521 
522   PetscFunctionBegin;
523   ierr = VecSet(v,zero);CHKERRQ(ierr);
524   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
525   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
526   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
527   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
528   radd = A->rmap->rstart*m;
529   for (i=0; i<len; i++) {
530     x[i] = aloc->v[radd + i*m + i];
531   }
532   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatDestroy_MPIDense(Mat mat)
537 {
538   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542 #if defined(PETSC_USE_LOG)
543   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
544 #endif
545   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
546   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
547   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
548   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
549 
550   ierr = PetscFree(mat->data);CHKERRQ(ierr);
551   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
552 
553   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
554   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
555   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
556 
557   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
558 #if defined(PETSC_HAVE_ELEMENTAL)
559   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
560 #endif
561   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
572 {
573   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode    ierr;
575   PetscViewerFormat format;
576   int               fd;
577   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
578   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
579   PetscScalar       *work,*v,*vv;
580   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
581 
582   PetscFunctionBegin;
583   if (mdn->size == 1) {
584     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
585   } else {
586     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
587     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
588     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
589 
590     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
591     if (format == PETSC_VIEWER_NATIVE) {
592 
593       if (!rank) {
594         /* store the matrix as a dense matrix */
595         header[0] = MAT_FILE_CLASSID;
596         header[1] = mat->rmap->N;
597         header[2] = N;
598         header[3] = MATRIX_BINARY_FORMAT_DENSE;
599         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
600 
601         /* get largest work array needed for transposing array */
602         mmax = mat->rmap->n;
603         for (i=1; i<size; i++) {
604           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
605         }
606         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
607 
608         /* write out local array, by rows */
609         m = mat->rmap->n;
610         v = a->v;
611         for (j=0; j<N; j++) {
612           for (i=0; i<m; i++) {
613             work[j + i*N] = *v++;
614           }
615         }
616         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
617         /* get largest work array to receive messages from other processes, excludes process zero */
618         mmax = 0;
619         for (i=1; i<size; i++) {
620           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
621         }
622         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
623         for (k = 1; k < size; k++) {
624           v    = vv;
625           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
626           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
627 
628           for (j = 0; j < N; j++) {
629             for (i = 0; i < m; i++) {
630               work[j + i*N] = *v++;
631             }
632           }
633           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
634         }
635         ierr = PetscFree(work);CHKERRQ(ierr);
636         ierr = PetscFree(vv);CHKERRQ(ierr);
637       } else {
638         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
639       }
640     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
641   }
642   PetscFunctionReturn(0);
643 }
644 
645 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
646 #include <petscdraw.h>
647 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
648 {
649   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
650   PetscErrorCode    ierr;
651   PetscMPIInt       rank = mdn->rank;
652   PetscViewerType   vtype;
653   PetscBool         iascii,isdraw;
654   PetscViewer       sviewer;
655   PetscViewerFormat format;
656 
657   PetscFunctionBegin;
658   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
659   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
660   if (iascii) {
661     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
662     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
663     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
664       MatInfo info;
665       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
666       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
667       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
668       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
669       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
670       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
671       PetscFunctionReturn(0);
672     } else if (format == PETSC_VIEWER_ASCII_INFO) {
673       PetscFunctionReturn(0);
674     }
675   } else if (isdraw) {
676     PetscDraw draw;
677     PetscBool isnull;
678 
679     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
680     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
681     if (isnull) PetscFunctionReturn(0);
682   }
683 
684   {
685     /* assemble the entire matrix onto first processor. */
686     Mat         A;
687     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
688     PetscInt    *cols;
689     PetscScalar *vals;
690 
691     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
692     if (!rank) {
693       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
694     } else {
695       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
696     }
697     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
698     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
699     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
700     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
701 
702     /* Copy the matrix ... This isn't the most efficient means,
703        but it's quick for now */
704     A->insertmode = INSERT_VALUES;
705 
706     row = mat->rmap->rstart;
707     m   = mdn->A->rmap->n;
708     for (i=0; i<m; i++) {
709       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
710       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
711       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
712       row++;
713     }
714 
715     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
716     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
717     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
718     if (!rank) {
719       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
720       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
721     }
722     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
723     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
724     ierr = MatDestroy(&A);CHKERRQ(ierr);
725   }
726   PetscFunctionReturn(0);
727 }
728 
729 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
730 {
731   PetscErrorCode ierr;
732   PetscBool      iascii,isbinary,isdraw,issocket;
733 
734   PetscFunctionBegin;
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
737   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
739 
740   if (iascii || issocket || isdraw) {
741     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
742   } else if (isbinary) {
743     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
749 {
750   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
751   Mat            mdn  = mat->A;
752   PetscErrorCode ierr;
753   PetscReal      isend[5],irecv[5];
754 
755   PetscFunctionBegin;
756   info->block_size = 1.0;
757 
758   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
759 
760   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
761   isend[3] = info->memory;  isend[4] = info->mallocs;
762   if (flag == MAT_LOCAL) {
763     info->nz_used      = isend[0];
764     info->nz_allocated = isend[1];
765     info->nz_unneeded  = isend[2];
766     info->memory       = isend[3];
767     info->mallocs      = isend[4];
768   } else if (flag == MAT_GLOBAL_MAX) {
769     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
770 
771     info->nz_used      = irecv[0];
772     info->nz_allocated = irecv[1];
773     info->nz_unneeded  = irecv[2];
774     info->memory       = irecv[3];
775     info->mallocs      = irecv[4];
776   } else if (flag == MAT_GLOBAL_SUM) {
777     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
778 
779     info->nz_used      = irecv[0];
780     info->nz_allocated = irecv[1];
781     info->nz_unneeded  = irecv[2];
782     info->memory       = irecv[3];
783     info->mallocs      = irecv[4];
784   }
785   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
786   info->fill_ratio_needed = 0;
787   info->factor_mallocs    = 0;
788   PetscFunctionReturn(0);
789 }
790 
791 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
792 {
793   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
794   PetscErrorCode ierr;
795 
796   PetscFunctionBegin;
797   switch (op) {
798   case MAT_NEW_NONZERO_LOCATIONS:
799   case MAT_NEW_NONZERO_LOCATION_ERR:
800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
801     MatCheckPreallocated(A,1);
802     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
803     break;
804   case MAT_ROW_ORIENTED:
805     MatCheckPreallocated(A,1);
806     a->roworiented = flg;
807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
808     break;
809   case MAT_NEW_DIAGONALS:
810   case MAT_KEEP_NONZERO_PATTERN:
811   case MAT_USE_HASH_TABLE:
812     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
813     break;
814   case MAT_IGNORE_OFF_PROC_ENTRIES:
815     a->donotstash = flg;
816     break;
817   case MAT_SYMMETRIC:
818   case MAT_STRUCTURALLY_SYMMETRIC:
819   case MAT_HERMITIAN:
820   case MAT_SYMMETRY_ETERNAL:
821   case MAT_IGNORE_LOWER_TRIANGULAR:
822     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
823     break;
824   default:
825     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
826   }
827   PetscFunctionReturn(0);
828 }
829 
830 
831 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
832 {
833   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
834   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
835   const PetscScalar *l,*r;
836   PetscScalar       x,*v;
837   PetscErrorCode    ierr;
838   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
839 
840   PetscFunctionBegin;
841   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
842   if (ll) {
843     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
844     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
845     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
846     for (i=0; i<m; i++) {
847       x = l[i];
848       v = mat->v + i;
849       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
850     }
851     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
852     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
853   }
854   if (rr) {
855     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
856     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
857     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
860     for (i=0; i<n; i++) {
861       x = r[i];
862       v = mat->v + i*m;
863       for (j=0; j<m; j++) (*v++) *= x;
864     }
865     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
866     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
867   }
868   PetscFunctionReturn(0);
869 }
870 
871 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
872 {
873   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
874   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
875   PetscErrorCode ierr;
876   PetscInt       i,j;
877   PetscReal      sum = 0.0;
878   PetscScalar    *v  = mat->v;
879 
880   PetscFunctionBegin;
881   if (mdn->size == 1) {
882     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
883   } else {
884     if (type == NORM_FROBENIUS) {
885       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
886         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
887       }
888       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
889       *nrm = PetscSqrtReal(*nrm);
890       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
891     } else if (type == NORM_1) {
892       PetscReal *tmp,*tmp2;
893       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
894       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
895       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
896       *nrm = 0.0;
897       v    = mat->v;
898       for (j=0; j<mdn->A->cmap->n; j++) {
899         for (i=0; i<mdn->A->rmap->n; i++) {
900           tmp[j] += PetscAbsScalar(*v);  v++;
901         }
902       }
903       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
904       for (j=0; j<A->cmap->N; j++) {
905         if (tmp2[j] > *nrm) *nrm = tmp2[j];
906       }
907       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
908       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
909     } else if (type == NORM_INFINITY) { /* max row norm */
910       PetscReal ntemp;
911       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
912       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
913     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
914   }
915   PetscFunctionReturn(0);
916 }
917 
918 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
919 {
920   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
921   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
922   Mat            B;
923   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
924   PetscErrorCode ierr;
925   PetscInt       j,i;
926   PetscScalar    *v;
927 
928   PetscFunctionBegin;
929   if (reuse == MAT_INPLACE_MATRIX  && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
930   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
931     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
932     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
933     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
934     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
935   } else {
936     B = *matout;
937   }
938 
939   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
940   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
941   for (i=0; i<m; i++) rwork[i] = rstart + i;
942   for (j=0; j<n; j++) {
943     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
944     v   += m;
945   }
946   ierr = PetscFree(rwork);CHKERRQ(ierr);
947   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
948   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
949   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
950     *matout = B;
951   } else {
952     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 
958 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
959 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
960 
961 PetscErrorCode MatSetUp_MPIDense(Mat A)
962 {
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
971 {
972   PetscErrorCode ierr;
973   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
974 
975   PetscFunctionBegin;
976   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
977   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
982 {
983   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
984   PetscErrorCode ierr;
985 
986   PetscFunctionBegin;
987   ierr = MatConjugate(a->A);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 PetscErrorCode MatRealPart_MPIDense(Mat A)
992 {
993   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
994   PetscErrorCode ierr;
995 
996   PetscFunctionBegin;
997   ierr = MatRealPart(a->A);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1002 {
1003   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1012 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1013 {
1014   PetscErrorCode ierr;
1015   PetscInt       i,n;
1016   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1017   PetscReal      *work;
1018 
1019   PetscFunctionBegin;
1020   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1021   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1022   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1023   if (type == NORM_2) {
1024     for (i=0; i<n; i++) work[i] *= work[i];
1025   }
1026   if (type == NORM_INFINITY) {
1027     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1028   } else {
1029     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1030   }
1031   ierr = PetscFree(work);CHKERRQ(ierr);
1032   if (type == NORM_2) {
1033     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1034   }
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1039 {
1040   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1041   PetscErrorCode ierr;
1042   PetscScalar    *a;
1043   PetscInt       m,n,i;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1047   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1048   for (i=0; i<m*n; i++) {
1049     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1050   }
1051   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1056 
1057 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1058 {
1059   PetscFunctionBegin;
1060   *missing = PETSC_FALSE;
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 /* -------------------------------------------------------------------*/
1065 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1066                                         MatGetRow_MPIDense,
1067                                         MatRestoreRow_MPIDense,
1068                                         MatMult_MPIDense,
1069                                 /*  4*/ MatMultAdd_MPIDense,
1070                                         MatMultTranspose_MPIDense,
1071                                         MatMultTransposeAdd_MPIDense,
1072                                         0,
1073                                         0,
1074                                         0,
1075                                 /* 10*/ 0,
1076                                         0,
1077                                         0,
1078                                         0,
1079                                         MatTranspose_MPIDense,
1080                                 /* 15*/ MatGetInfo_MPIDense,
1081                                         MatEqual_MPIDense,
1082                                         MatGetDiagonal_MPIDense,
1083                                         MatDiagonalScale_MPIDense,
1084                                         MatNorm_MPIDense,
1085                                 /* 20*/ MatAssemblyBegin_MPIDense,
1086                                         MatAssemblyEnd_MPIDense,
1087                                         MatSetOption_MPIDense,
1088                                         MatZeroEntries_MPIDense,
1089                                 /* 24*/ MatZeroRows_MPIDense,
1090                                         0,
1091                                         0,
1092                                         0,
1093                                         0,
1094                                 /* 29*/ MatSetUp_MPIDense,
1095                                         0,
1096                                         0,
1097                                         MatGetDiagonalBlock_MPIDense,
1098                                         0,
1099                                 /* 34*/ MatDuplicate_MPIDense,
1100                                         0,
1101                                         0,
1102                                         0,
1103                                         0,
1104                                 /* 39*/ MatAXPY_MPIDense,
1105                                         MatCreateSubMatrices_MPIDense,
1106                                         0,
1107                                         MatGetValues_MPIDense,
1108                                         0,
1109                                 /* 44*/ 0,
1110                                         MatScale_MPIDense,
1111                                         MatShift_Basic,
1112                                         0,
1113                                         0,
1114                                 /* 49*/ MatSetRandom_MPIDense,
1115                                         0,
1116                                         0,
1117                                         0,
1118                                         0,
1119                                 /* 54*/ 0,
1120                                         0,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1125                                         MatDestroy_MPIDense,
1126                                         MatView_MPIDense,
1127                                         0,
1128                                         0,
1129                                 /* 64*/ 0,
1130                                         0,
1131                                         0,
1132                                         0,
1133                                         0,
1134                                 /* 69*/ 0,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                 /* 74*/ 0,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                 /* 79*/ 0,
1145                                         0,
1146                                         0,
1147                                         0,
1148                                 /* 83*/ MatLoad_MPIDense,
1149                                         0,
1150                                         0,
1151                                         0,
1152                                         0,
1153                                         0,
1154 #if defined(PETSC_HAVE_ELEMENTAL)
1155                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1156                                         MatMatMultSymbolic_MPIDense_MPIDense,
1157 #else
1158                                 /* 89*/ 0,
1159                                         0,
1160 #endif
1161                                         MatMatMultNumeric_MPIDense,
1162                                         0,
1163                                         0,
1164                                 /* 94*/ 0,
1165                                         0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                 /* 99*/ 0,
1170                                         0,
1171                                         0,
1172                                         MatConjugate_MPIDense,
1173                                         0,
1174                                 /*104*/ 0,
1175                                         MatRealPart_MPIDense,
1176                                         MatImaginaryPart_MPIDense,
1177                                         0,
1178                                         0,
1179                                 /*109*/ 0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                         MatMissingDiagonal_MPIDense,
1184                                 /*114*/ 0,
1185                                         0,
1186                                         0,
1187                                         0,
1188                                         0,
1189                                 /*119*/ 0,
1190                                         0,
1191                                         0,
1192                                         0,
1193                                         0,
1194                                 /*124*/ 0,
1195                                         MatGetColumnNorms_MPIDense,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                 /*129*/ 0,
1200                                         MatTransposeMatMult_MPIDense_MPIDense,
1201                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1202                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1203                                         0,
1204                                 /*134*/ 0,
1205                                         0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                 /*139*/ 0,
1210                                         0,
1211                                         0
1212 };
1213 
1214 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1215 {
1216   Mat_MPIDense   *a;
1217   PetscErrorCode ierr;
1218 
1219   PetscFunctionBegin;
1220   mat->preallocated = PETSC_TRUE;
1221   /* Note:  For now, when data is specified above, this assumes the user correctly
1222    allocates the local dense storage space.  We should add error checking. */
1223 
1224   a       = (Mat_MPIDense*)mat->data;
1225   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1226   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1227   a->nvec = mat->cmap->n;
1228 
1229   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1230   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1231   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1232   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1233   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #if defined(PETSC_HAVE_ELEMENTAL)
1238 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1239 {
1240   Mat            mat_elemental;
1241   PetscErrorCode ierr;
1242   PetscScalar    *v;
1243   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1244 
1245   PetscFunctionBegin;
1246   if (reuse == MAT_REUSE_MATRIX) {
1247     mat_elemental = *newmat;
1248     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1249   } else {
1250     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1251     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1252     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1253     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1254     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1255   }
1256 
1257   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1258   for (i=0; i<N; i++) cols[i] = i;
1259   for (i=0; i<m; i++) rows[i] = rstart + i;
1260 
1261   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1262   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1263   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1264   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1265   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1266   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1267   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1268 
1269   if (reuse == MAT_INPLACE_MATRIX) {
1270     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1271   } else {
1272     *newmat = mat_elemental;
1273   }
1274   PetscFunctionReturn(0);
1275 }
1276 #endif
1277 
1278 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1279 {
1280   Mat_MPIDense   *a;
1281   PetscErrorCode ierr;
1282 
1283   PetscFunctionBegin;
1284   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1285   mat->data = (void*)a;
1286   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1287 
1288   mat->insertmode = NOT_SET_VALUES;
1289   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1290   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1291 
1292   /* build cache for off array entries formed */
1293   a->donotstash = PETSC_FALSE;
1294 
1295   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1296 
1297   /* stuff used for matrix vector multiply */
1298   a->lvec        = 0;
1299   a->Mvctx       = 0;
1300   a->roworiented = PETSC_TRUE;
1301 
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1306 #if defined(PETSC_HAVE_ELEMENTAL)
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1308 #endif
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1313 
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1317   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 /*MC
1322    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1323 
1324    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1325    and MATMPIDENSE otherwise.
1326 
1327    Options Database Keys:
1328 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1329 
1330   Level: beginner
1331 
1332 
1333 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1334 M*/
1335 
1336 /*@C
1337    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1338 
1339    Not collective
1340 
1341    Input Parameters:
1342 .  B - the matrix
1343 -  data - optional location of matrix data.  Set data=NULL for PETSc
1344    to control all matrix memory allocation.
1345 
1346    Notes:
1347    The dense format is fully compatible with standard Fortran 77
1348    storage by columns.
1349 
1350    The data input variable is intended primarily for Fortran programmers
1351    who wish to allocate their own matrix memory space.  Most users should
1352    set data=NULL.
1353 
1354    Level: intermediate
1355 
1356 .keywords: matrix,dense, parallel
1357 
1358 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1359 @*/
1360 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1361 {
1362   PetscErrorCode ierr;
1363 
1364   PetscFunctionBegin;
1365   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 /*@
1370    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1371    array provided by the user. This is useful to avoid copying an array
1372    into a matrix
1373 
1374    Not Collective
1375 
1376    Input Parameters:
1377 +  mat - the matrix
1378 -  array - the array in column major order
1379 
1380    Notes:
1381    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1382    freed when the matrix is destroyed.
1383 
1384    Level: developer
1385 
1386 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1387 
1388 @*/
1389 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1390 {
1391   PetscErrorCode ierr;
1392   PetscFunctionBegin;
1393   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1394   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 /*@
1399    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1400 
1401    Not Collective
1402 
1403    Input Parameters:
1404 .  mat - the matrix
1405 
1406    Notes:
1407    You can only call this after a call to MatDensePlaceArray()
1408 
1409    Level: developer
1410 
1411 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1412 
1413 @*/
1414 PetscErrorCode  MatDenseResetArray(Mat mat)
1415 {
1416   PetscErrorCode ierr;
1417   PetscFunctionBegin;
1418   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1419   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 /*@C
1424    MatCreateDense - Creates a parallel matrix in dense format.
1425 
1426    Collective on MPI_Comm
1427 
1428    Input Parameters:
1429 +  comm - MPI communicator
1430 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1431 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1432 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1433 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1434 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1435    to control all matrix memory allocation.
1436 
1437    Output Parameter:
1438 .  A - the matrix
1439 
1440    Notes:
1441    The dense format is fully compatible with standard Fortran 77
1442    storage by columns.
1443 
1444    The data input variable is intended primarily for Fortran programmers
1445    who wish to allocate their own matrix memory space.  Most users should
1446    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1447 
1448    The user MUST specify either the local or global matrix dimensions
1449    (possibly both).
1450 
1451    Level: intermediate
1452 
1453 .keywords: matrix,dense, parallel
1454 
1455 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1456 @*/
1457 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1458 {
1459   PetscErrorCode ierr;
1460   PetscMPIInt    size;
1461 
1462   PetscFunctionBegin;
1463   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1464   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1465   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1466   if (size > 1) {
1467     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1468     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1469     if (data) {  /* user provided data array, so no need to assemble */
1470       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1471       (*A)->assembled = PETSC_TRUE;
1472     }
1473   } else {
1474     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1475     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1476   }
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1481 {
1482   Mat            mat;
1483   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1484   PetscErrorCode ierr;
1485 
1486   PetscFunctionBegin;
1487   *newmat = 0;
1488   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1489   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1490   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1491   a       = (Mat_MPIDense*)mat->data;
1492   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1493 
1494   mat->factortype   = A->factortype;
1495   mat->assembled    = PETSC_TRUE;
1496   mat->preallocated = PETSC_TRUE;
1497 
1498   a->size         = oldmat->size;
1499   a->rank         = oldmat->rank;
1500   mat->insertmode = NOT_SET_VALUES;
1501   a->nvec         = oldmat->nvec;
1502   a->donotstash   = oldmat->donotstash;
1503 
1504   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1505   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1506 
1507   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1508   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1509   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1510 
1511   *newmat = mat;
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1516 {
1517   PetscErrorCode ierr;
1518   PetscMPIInt    rank,size;
1519   const PetscInt *rowners;
1520   PetscInt       i,m,n,nz,j,mMax;
1521   PetscScalar    *array,*vals,*vals_ptr;
1522   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1523 
1524   PetscFunctionBegin;
1525   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1527 
1528   /* determine ownership of rows and columns */
1529   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1530   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1531 
1532   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1533   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1534     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1535   }
1536   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1537   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1538   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1539   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1540   if (!rank) {
1541     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1542 
1543     /* read in my part of the matrix numerical values  */
1544     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1545 
1546     /* insert into matrix-by row (this is why cannot directly read into array */
1547     vals_ptr = vals;
1548     for (i=0; i<m; i++) {
1549       for (j=0; j<N; j++) {
1550         array[i + j*m] = *vals_ptr++;
1551       }
1552     }
1553 
1554     /* read in other processors and ship out */
1555     for (i=1; i<size; i++) {
1556       nz   = (rowners[i+1] - rowners[i])*N;
1557       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1558       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1559     }
1560   } else {
1561     /* receive numeric values */
1562     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1563 
1564     /* receive message of values*/
1565     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1566 
1567     /* insert into matrix-by row (this is why cannot directly read into array */
1568     vals_ptr = vals;
1569     for (i=0; i<m; i++) {
1570       for (j=0; j<N; j++) {
1571         array[i + j*m] = *vals_ptr++;
1572       }
1573     }
1574   }
1575   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1576   ierr = PetscFree(vals);CHKERRQ(ierr);
1577   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1578   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1583 {
1584   Mat_MPIDense   *a;
1585   PetscScalar    *vals,*svals;
1586   MPI_Comm       comm;
1587   MPI_Status     status;
1588   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1589   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1590   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1591   PetscInt       i,nz,j,rstart,rend;
1592   int            fd;
1593   PetscErrorCode ierr;
1594 
1595   PetscFunctionBegin;
1596   /* force binary viewer to load .info file if it has not yet done so */
1597   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1598   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1599   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1600   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1601   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1602   if (!rank) {
1603     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1604     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1605   }
1606   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1607   M    = header[1]; N = header[2]; nz = header[3];
1608 
1609   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1610   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1611   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1612 
1613   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1614   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1615 
1616   /*
1617        Handle case where matrix is stored on disk as a dense matrix
1618   */
1619   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1620     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1621     PetscFunctionReturn(0);
1622   }
1623 
1624   /* determine ownership of all rows */
1625   if (newmat->rmap->n < 0) {
1626     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1627   } else {
1628     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1629   }
1630   if (newmat->cmap->n < 0) {
1631     n = PETSC_DECIDE;
1632   } else {
1633     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1634   }
1635 
1636   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1637   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1638   rowners[0] = 0;
1639   for (i=2; i<=size; i++) {
1640     rowners[i] += rowners[i-1];
1641   }
1642   rstart = rowners[rank];
1643   rend   = rowners[rank+1];
1644 
1645   /* distribute row lengths to all processors */
1646   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1647   if (!rank) {
1648     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1649     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1650     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1651     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1652     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1653     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1654   } else {
1655     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1656   }
1657 
1658   if (!rank) {
1659     /* calculate the number of nonzeros on each processor */
1660     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1661     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1662     for (i=0; i<size; i++) {
1663       for (j=rowners[i]; j< rowners[i+1]; j++) {
1664         procsnz[i] += rowlengths[j];
1665       }
1666     }
1667     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1668 
1669     /* determine max buffer needed and allocate it */
1670     maxnz = 0;
1671     for (i=0; i<size; i++) {
1672       maxnz = PetscMax(maxnz,procsnz[i]);
1673     }
1674     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1675 
1676     /* read in my part of the matrix column indices  */
1677     nz   = procsnz[0];
1678     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1679     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1680 
1681     /* read in every one elses and ship off */
1682     for (i=1; i<size; i++) {
1683       nz   = procsnz[i];
1684       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1685       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1686     }
1687     ierr = PetscFree(cols);CHKERRQ(ierr);
1688   } else {
1689     /* determine buffer space needed for message */
1690     nz = 0;
1691     for (i=0; i<m; i++) {
1692       nz += ourlens[i];
1693     }
1694     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1695 
1696     /* receive message of column indices*/
1697     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1698     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1699     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1700   }
1701 
1702   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1703   a = (Mat_MPIDense*)newmat->data;
1704   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1705     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1706   }
1707 
1708   if (!rank) {
1709     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1710 
1711     /* read in my part of the matrix numerical values  */
1712     nz   = procsnz[0];
1713     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1714 
1715     /* insert into matrix */
1716     jj      = rstart;
1717     smycols = mycols;
1718     svals   = vals;
1719     for (i=0; i<m; i++) {
1720       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1721       smycols += ourlens[i];
1722       svals   += ourlens[i];
1723       jj++;
1724     }
1725 
1726     /* read in other processors and ship out */
1727     for (i=1; i<size; i++) {
1728       nz   = procsnz[i];
1729       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1730       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1731     }
1732     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1733   } else {
1734     /* receive numeric values */
1735     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1736 
1737     /* receive message of values*/
1738     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1739     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1740     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1741 
1742     /* insert into matrix */
1743     jj      = rstart;
1744     smycols = mycols;
1745     svals   = vals;
1746     for (i=0; i<m; i++) {
1747       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1748       smycols += ourlens[i];
1749       svals   += ourlens[i];
1750       jj++;
1751     }
1752   }
1753   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1754   ierr = PetscFree(vals);CHKERRQ(ierr);
1755   ierr = PetscFree(mycols);CHKERRQ(ierr);
1756   ierr = PetscFree(rowners);CHKERRQ(ierr);
1757 
1758   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1759   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1764 {
1765   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1766   Mat            a,b;
1767   PetscBool      flg;
1768   PetscErrorCode ierr;
1769 
1770   PetscFunctionBegin;
1771   a    = matA->A;
1772   b    = matB->A;
1773   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1774   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1779 {
1780   PetscErrorCode        ierr;
1781   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1782   Mat_TransMatMultDense *atb = a->atbdense;
1783 
1784   PetscFunctionBegin;
1785   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1786   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1787   ierr = PetscFree(atb);CHKERRQ(ierr);
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1792 {
1793   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1794   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1795   Mat_TransMatMultDense *atb = c->atbdense;
1796   PetscErrorCode ierr;
1797   MPI_Comm       comm;
1798   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1799   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1800   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1801   PetscScalar    _DOne=1.0,_DZero=0.0;
1802   PetscBLASInt   am,an,bn,aN;
1803   const PetscInt *ranges;
1804 
1805   PetscFunctionBegin;
1806   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1807   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1808   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1809 
1810   /* compute atbarray = aseq^T * bseq */
1811   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1812   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1813   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1814   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1815   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1816 
1817   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1818   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1819 
1820   /* arrange atbarray into sendbuf */
1821   k = 0;
1822   for (proc=0; proc<size; proc++) {
1823     for (j=0; j<cN; j++) {
1824       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1825     }
1826   }
1827   /* sum all atbarray to local values of C */
1828   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1829   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1830   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1835 {
1836   PetscErrorCode        ierr;
1837   Mat                   Cdense;
1838   MPI_Comm              comm;
1839   PetscMPIInt           size;
1840   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1841   Mat_MPIDense          *c;
1842   Mat_TransMatMultDense *atb;
1843 
1844   PetscFunctionBegin;
1845   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1846   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1847     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1848   }
1849 
1850   /* create matrix product Cdense */
1851   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1852   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1853   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1854   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1855   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1856   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1857   *C   = Cdense;
1858 
1859   /* create data structure for reuse Cdense */
1860   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1861   ierr = PetscNew(&atb);CHKERRQ(ierr);
1862   cM = Cdense->rmap->N;
1863   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1864 
1865   c                    = (Mat_MPIDense*)Cdense->data;
1866   c->atbdense          = atb;
1867   atb->destroy         = Cdense->ops->destroy;
1868   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1873 {
1874   PetscErrorCode ierr;
1875 
1876   PetscFunctionBegin;
1877   if (scall == MAT_INITIAL_MATRIX) {
1878     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1879   }
1880   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1885 {
1886   PetscErrorCode   ierr;
1887   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1888   Mat_MatMultDense *ab = a->abdense;
1889 
1890   PetscFunctionBegin;
1891   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1892   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1893   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1894 
1895   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1896   ierr = PetscFree(ab);CHKERRQ(ierr);
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #if defined(PETSC_HAVE_ELEMENTAL)
1901 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1902 {
1903   PetscErrorCode   ierr;
1904   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1905   Mat_MatMultDense *ab=c->abdense;
1906 
1907   PetscFunctionBegin;
1908   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1909   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1910   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1911   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1916 {
1917   PetscErrorCode   ierr;
1918   Mat              Ae,Be,Ce;
1919   Mat_MPIDense     *c;
1920   Mat_MatMultDense *ab;
1921 
1922   PetscFunctionBegin;
1923   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1924     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1925   }
1926 
1927   /* convert A and B to Elemental matrices Ae and Be */
1928   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1929   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1930 
1931   /* Ce = Ae*Be */
1932   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1933   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1934 
1935   /* convert Ce to C */
1936   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1937 
1938   /* create data structure for reuse Cdense */
1939   ierr = PetscNew(&ab);CHKERRQ(ierr);
1940   c                  = (Mat_MPIDense*)(*C)->data;
1941   c->abdense         = ab;
1942 
1943   ab->Ae             = Ae;
1944   ab->Be             = Be;
1945   ab->Ce             = Ce;
1946   ab->destroy        = (*C)->ops->destroy;
1947   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1948   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1953 {
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1958     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1959     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1960     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1961   } else {
1962     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1963     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1964     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1965   }
1966   PetscFunctionReturn(0);
1967 }
1968 #endif
1969