xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision eb91f3213a4c4937d73f38b7a5b345eff80215e5)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
188 {
189   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
190   PetscErrorCode ierr;
191 
192   PetscFunctionBegin;
193   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
198 {
199   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
200   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
201   PetscErrorCode ierr;
202   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
203   const PetscInt *irow,*icol;
204   PetscScalar    *av,*bv,*v = lmat->v;
205   Mat            newmat;
206   IS             iscol_local;
207   MPI_Comm       comm_is,comm_mat;
208 
209   PetscFunctionBegin;
210   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
211   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
212   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
213 
214   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
215   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
216   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
217   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
218   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
219   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
220 
221   /* No parallel redistribution currently supported! Should really check each index set
222      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
223      original matrix! */
224 
225   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
226   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
227 
228   /* Check submatrix call */
229   if (scall == MAT_REUSE_MATRIX) {
230     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
231     /* Really need to test rows and column sizes! */
232     newmat = *B;
233   } else {
234     /* Create and fill new matrix */
235     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
236     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
237     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
238     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
239   }
240 
241   /* Now extract the data pointers and do the copy, column at a time */
242   newmatd = (Mat_MPIDense*)newmat->data;
243   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
244 
245   for (i=0; i<Ncols; i++) {
246     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
247     for (j=0; j<nrows; j++) {
248       *bv++ = av[irow[j] - rstart];
249     }
250   }
251 
252   /* Assemble the matrices so that the correct flags are set */
253   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
254   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
255 
256   /* Free work space */
257   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
259   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
260   *B   = newmat;
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
275 {
276   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
287   MPI_Comm       comm;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
294   /* make sure all processors are either in INSERTMODE or ADDMODE */
295   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
296   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
297   mat->insertmode = addv; /* in case this processor had no cache */
298 
299   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
300   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
301   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
306 {
307   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
308   PetscErrorCode ierr;
309   PetscInt       i,*row,*col,flg,j,rstart,ncols;
310   PetscMPIInt    n;
311   PetscScalar    *val;
312 
313   PetscFunctionBegin;
314   /*  wait on receives */
315   while (1) {
316     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
317     if (!flg) break;
318 
319     for (i=0; i<n;) {
320       /* Now identify the consecutive vals belonging to the same row */
321       for (j=i,rstart=row[j]; j<n; j++) {
322         if (row[j] != rstart) break;
323       }
324       if (j < n) ncols = j-i;
325       else       ncols = n-i;
326       /* Now assemble all these values with a single function call */
327       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
328       i    = j;
329     }
330   }
331   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
332 
333   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
334   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
335 
336   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
337     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
338   }
339   PetscFunctionReturn(0);
340 }
341 
342 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
343 {
344   PetscErrorCode ierr;
345   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
346 
347   PetscFunctionBegin;
348   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* the code does not do the diagonal entries correctly unless the
353    matrix is square and the column and row owerships are identical.
354    This is a BUG. The only way to fix it seems to be to access
355    mdn->A and mdn->B directly and not through the MatZeroRows()
356    routine.
357 */
358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
359 {
360   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
361   PetscErrorCode    ierr;
362   PetscInt          i,*owners = A->rmap->range;
363   PetscInt          *sizes,j,idx,nsends;
364   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
365   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
366   PetscInt          *lens,*lrows,*values;
367   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
368   MPI_Comm          comm;
369   MPI_Request       *send_waits,*recv_waits;
370   MPI_Status        recv_status,*send_status;
371   PetscBool         found;
372   const PetscScalar *xx;
373   PetscScalar       *bb;
374 
375   PetscFunctionBegin;
376   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
377   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
378   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
379   /*  first count number of contributors to each processor */
380   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
381   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
382   for (i=0; i<N; i++) {
383     idx   = rows[i];
384     found = PETSC_FALSE;
385     for (j=0; j<size; j++) {
386       if (idx >= owners[j] && idx < owners[j+1]) {
387         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
388       }
389     }
390     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
391   }
392   nsends = 0;
393   for (i=0; i<size; i++) nsends += sizes[2*i+1];
394 
395   /* inform other processors of number of messages and max length*/
396   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
397 
398   /* post receives:   */
399   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
401   for (i=0; i<nrecvs; i++) {
402     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
403   }
404 
405   /* do sends:
406       1) starts[i] gives the starting index in svalues for stuff going to
407          the ith processor
408   */
409   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
410   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
411   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
412 
413   starts[0] = 0;
414   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
415   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
416 
417   starts[0] = 0;
418   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
419   count = 0;
420   for (i=0; i<size; i++) {
421     if (sizes[2*i+1]) {
422       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
423     }
424   }
425   ierr = PetscFree(starts);CHKERRQ(ierr);
426 
427   base = owners[rank];
428 
429   /*  wait on receives */
430   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
431   count = nrecvs;
432   slen  = 0;
433   while (count) {
434     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
435     /* unpack receives into our local space */
436     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
437 
438     source[imdex] = recv_status.MPI_SOURCE;
439     lens[imdex]   = n;
440     slen += n;
441     count--;
442   }
443   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
444 
445   /* move the data into the send scatter */
446   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
447   count = 0;
448   for (i=0; i<nrecvs; i++) {
449     values = rvalues + i*nmax;
450     for (j=0; j<lens[i]; j++) {
451       lrows[count++] = values[j] - base;
452     }
453   }
454   ierr = PetscFree(rvalues);CHKERRQ(ierr);
455   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
456   ierr = PetscFree(owner);CHKERRQ(ierr);
457   ierr = PetscFree(sizes);CHKERRQ(ierr);
458 
459   /* fix right hand side if needed */
460   if (x && b) {
461     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
462     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
463     for (i=0; i<slen; i++) {
464       bb[lrows[i]] = diag*xx[lrows[i]];
465     }
466     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
467     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
468   }
469 
470   /* actually zap the local rows */
471   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
472   if (diag != 0.0) {
473     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
474     PetscInt     m   = ll->lda, i;
475 
476     for (i=0; i<slen; i++) {
477       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
478     }
479   }
480   ierr = PetscFree(lrows);CHKERRQ(ierr);
481 
482   /* wait on sends */
483   if (nsends) {
484     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
485     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
486     ierr = PetscFree(send_status);CHKERRQ(ierr);
487   }
488   ierr = PetscFree(send_waits);CHKERRQ(ierr);
489   ierr = PetscFree(svalues);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
497 
498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
501   PetscErrorCode ierr;
502 
503   PetscFunctionBegin;
504   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
517   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
523 {
524   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
525   PetscErrorCode ierr;
526   PetscScalar    zero = 0.0;
527 
528   PetscFunctionBegin;
529   ierr = VecSet(yy,zero);CHKERRQ(ierr);
530   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
531   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
532   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
537 {
538   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
543   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
544   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
545   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
550 {
551   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
552   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
553   PetscErrorCode ierr;
554   PetscInt       len,i,n,m = A->rmap->n,radd;
555   PetscScalar    *x,zero = 0.0;
556 
557   PetscFunctionBegin;
558   ierr = VecSet(v,zero);CHKERRQ(ierr);
559   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
560   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
561   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
563   radd = A->rmap->rstart*m;
564   for (i=0; i<len; i++) {
565     x[i] = aloc->v[radd + i*m + i];
566   }
567   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 PetscErrorCode MatDestroy_MPIDense(Mat mat)
572 {
573   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
579 #endif
580   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
581   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
582   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
583   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
584 
585   ierr = PetscFree(mat->data);CHKERRQ(ierr);
586   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
587 
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
595 #if defined(PETSC_HAVE_ELEMENTAL)
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
597 #endif
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 
610 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
611 {
612   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
613   PetscErrorCode    ierr;
614   PetscViewerFormat format;
615   int               fd;
616   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
617   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
618   PetscScalar       *work,*v,*vv;
619   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
620 
621   PetscFunctionBegin;
622   if (mdn->size == 1) {
623     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
624   } else {
625     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
626     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
627     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
628 
629     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
630     if (format == PETSC_VIEWER_NATIVE) {
631 
632       if (!rank) {
633         /* store the matrix as a dense matrix */
634         header[0] = MAT_FILE_CLASSID;
635         header[1] = mat->rmap->N;
636         header[2] = N;
637         header[3] = MATRIX_BINARY_FORMAT_DENSE;
638         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
639 
640         /* get largest work array needed for transposing array */
641         mmax = mat->rmap->n;
642         for (i=1; i<size; i++) {
643           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644         }
645         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
646 
647         /* write out local array, by rows */
648         m = mat->rmap->n;
649         v = a->v;
650         for (j=0; j<N; j++) {
651           for (i=0; i<m; i++) {
652             work[j + i*N] = *v++;
653           }
654         }
655         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
656         /* get largest work array to receive messages from other processes, excludes process zero */
657         mmax = 0;
658         for (i=1; i<size; i++) {
659           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
660         }
661         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
662         for (k = 1; k < size; k++) {
663           v    = vv;
664           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
665           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
666 
667           for (j = 0; j < N; j++) {
668             for (i = 0; i < m; i++) {
669               work[j + i*N] = *v++;
670             }
671           }
672           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
673         }
674         ierr = PetscFree(work);CHKERRQ(ierr);
675         ierr = PetscFree(vv);CHKERRQ(ierr);
676       } else {
677         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
678       }
679     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
680   }
681   PetscFunctionReturn(0);
682 }
683 
684 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
685 #include <petscdraw.h>
686 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
687 {
688   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
689   PetscErrorCode    ierr;
690   PetscMPIInt       rank = mdn->rank;
691   PetscViewerType   vtype;
692   PetscBool         iascii,isdraw;
693   PetscViewer       sviewer;
694   PetscViewerFormat format;
695 
696   PetscFunctionBegin;
697   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
698   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
699   if (iascii) {
700     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
701     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
702     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
703       MatInfo info;
704       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
705       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
706       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
707       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
708       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
709       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
710       PetscFunctionReturn(0);
711     } else if (format == PETSC_VIEWER_ASCII_INFO) {
712       PetscFunctionReturn(0);
713     }
714   } else if (isdraw) {
715     PetscDraw draw;
716     PetscBool isnull;
717 
718     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
719     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
720     if (isnull) PetscFunctionReturn(0);
721   }
722 
723   {
724     /* assemble the entire matrix onto first processor. */
725     Mat         A;
726     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
727     PetscInt    *cols;
728     PetscScalar *vals;
729 
730     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
737     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
738     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
739     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
740 
741     /* Copy the matrix ... This isn't the most efficient means,
742        but it's quick for now */
743     A->insertmode = INSERT_VALUES;
744 
745     row = mat->rmap->rstart;
746     m   = mdn->A->rmap->n;
747     for (i=0; i<m; i++) {
748       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
749       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
750       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
751       row++;
752     }
753 
754     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
755     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
756     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
757     if (!rank) {
758       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
759       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
760     }
761     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
762     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
763     ierr = MatDestroy(&A);CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
769 {
770   PetscErrorCode ierr;
771   PetscBool      iascii,isbinary,isdraw,issocket;
772 
773   PetscFunctionBegin;
774   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
775   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
776   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
777   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
778 
779   if (iascii || issocket || isdraw) {
780     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
781   } else if (isbinary) {
782     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
788 {
789   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
790   Mat            mdn  = mat->A;
791   PetscErrorCode ierr;
792   PetscReal      isend[5],irecv[5];
793 
794   PetscFunctionBegin;
795   info->block_size = 1.0;
796 
797   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
798 
799   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
800   isend[3] = info->memory;  isend[4] = info->mallocs;
801   if (flag == MAT_LOCAL) {
802     info->nz_used      = isend[0];
803     info->nz_allocated = isend[1];
804     info->nz_unneeded  = isend[2];
805     info->memory       = isend[3];
806     info->mallocs      = isend[4];
807   } else if (flag == MAT_GLOBAL_MAX) {
808     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
809 
810     info->nz_used      = irecv[0];
811     info->nz_allocated = irecv[1];
812     info->nz_unneeded  = irecv[2];
813     info->memory       = irecv[3];
814     info->mallocs      = irecv[4];
815   } else if (flag == MAT_GLOBAL_SUM) {
816     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
817 
818     info->nz_used      = irecv[0];
819     info->nz_allocated = irecv[1];
820     info->nz_unneeded  = irecv[2];
821     info->memory       = irecv[3];
822     info->mallocs      = irecv[4];
823   }
824   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
825   info->fill_ratio_needed = 0;
826   info->factor_mallocs    = 0;
827   PetscFunctionReturn(0);
828 }
829 
830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
831 {
832   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
833   PetscErrorCode ierr;
834 
835   PetscFunctionBegin;
836   switch (op) {
837   case MAT_NEW_NONZERO_LOCATIONS:
838   case MAT_NEW_NONZERO_LOCATION_ERR:
839   case MAT_NEW_NONZERO_ALLOCATION_ERR:
840     MatCheckPreallocated(A,1);
841     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
842     break;
843   case MAT_ROW_ORIENTED:
844     MatCheckPreallocated(A,1);
845     a->roworiented = flg;
846     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
847     break;
848   case MAT_NEW_DIAGONALS:
849   case MAT_KEEP_NONZERO_PATTERN:
850   case MAT_USE_HASH_TABLE:
851     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
852     break;
853   case MAT_IGNORE_OFF_PROC_ENTRIES:
854     a->donotstash = flg;
855     break;
856   case MAT_SYMMETRIC:
857   case MAT_STRUCTURALLY_SYMMETRIC:
858   case MAT_HERMITIAN:
859   case MAT_SYMMETRY_ETERNAL:
860   case MAT_IGNORE_LOWER_TRIANGULAR:
861   case MAT_IGNORE_ZERO_ENTRIES:
862     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
863     break;
864   default:
865     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 
871 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
872 {
873   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
874   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
875   const PetscScalar *l,*r;
876   PetscScalar       x,*v;
877   PetscErrorCode    ierr;
878   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
879 
880   PetscFunctionBegin;
881   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
882   if (ll) {
883     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
884     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
885     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
886     for (i=0; i<m; i++) {
887       x = l[i];
888       v = mat->v + i;
889       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
890     }
891     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
892     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
893   }
894   if (rr) {
895     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
896     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
897     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
898     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
899     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
900     for (i=0; i<n; i++) {
901       x = r[i];
902       v = mat->v + i*m;
903       for (j=0; j<m; j++) (*v++) *= x;
904     }
905     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
906     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
912 {
913   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
914   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
915   PetscErrorCode ierr;
916   PetscInt       i,j;
917   PetscReal      sum = 0.0;
918   PetscScalar    *v  = mat->v;
919 
920   PetscFunctionBegin;
921   if (mdn->size == 1) {
922     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
923   } else {
924     if (type == NORM_FROBENIUS) {
925       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
926         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
927       }
928       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929       *nrm = PetscSqrtReal(*nrm);
930       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
931     } else if (type == NORM_1) {
932       PetscReal *tmp,*tmp2;
933       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
934       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
935       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
936       *nrm = 0.0;
937       v    = mat->v;
938       for (j=0; j<mdn->A->cmap->n; j++) {
939         for (i=0; i<mdn->A->rmap->n; i++) {
940           tmp[j] += PetscAbsScalar(*v);  v++;
941         }
942       }
943       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
944       for (j=0; j<A->cmap->N; j++) {
945         if (tmp2[j] > *nrm) *nrm = tmp2[j];
946       }
947       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
948       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
949     } else if (type == NORM_INFINITY) { /* max row norm */
950       PetscReal ntemp;
951       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
952       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
953     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
954   }
955   PetscFunctionReturn(0);
956 }
957 
958 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
959 {
960   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
961   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
962   Mat            B;
963   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
964   PetscErrorCode ierr;
965   PetscInt       j,i;
966   PetscScalar    *v;
967 
968   PetscFunctionBegin;
969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
970     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
971     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
972     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
973     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
974   } else {
975     B = *matout;
976   }
977 
978   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
979   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
980   for (i=0; i<m; i++) rwork[i] = rstart + i;
981   for (j=0; j<n; j++) {
982     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
983     v   += m;
984   }
985   ierr = PetscFree(rwork);CHKERRQ(ierr);
986   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
987   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
988   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
989     *matout = B;
990   } else {
991     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
992   }
993   PetscFunctionReturn(0);
994 }
995 
996 
997 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
998 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
999 
1000 PetscErrorCode MatSetUp_MPIDense(Mat A)
1001 {
1002   PetscErrorCode ierr;
1003 
1004   PetscFunctionBegin;
1005   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1010 {
1011   PetscErrorCode ierr;
1012   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1013 
1014   PetscFunctionBegin;
1015   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1016   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1021 {
1022   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1023   PetscErrorCode ierr;
1024 
1025   PetscFunctionBegin;
1026   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 PetscErrorCode MatRealPart_MPIDense(Mat A)
1031 {
1032   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1033   PetscErrorCode ierr;
1034 
1035   PetscFunctionBegin;
1036   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1041 {
1042   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1051 {
1052   PetscErrorCode    ierr;
1053   PetscScalar       *x;
1054   const PetscScalar *a;
1055   PetscInt          lda;
1056 
1057   PetscFunctionBegin;
1058   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1059   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1060   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1061   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1062   ierr = PetscMemcpy(x,a+col*lda,A->rmap->n*sizeof(PetscScalar));CHKERRQ(ierr);
1063   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1064   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1069 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1070 {
1071   PetscErrorCode ierr;
1072   PetscInt       i,n;
1073   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1074   PetscReal      *work;
1075 
1076   PetscFunctionBegin;
1077   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1078   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1079   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1080   if (type == NORM_2) {
1081     for (i=0; i<n; i++) work[i] *= work[i];
1082   }
1083   if (type == NORM_INFINITY) {
1084     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1085   } else {
1086     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1087   }
1088   ierr = PetscFree(work);CHKERRQ(ierr);
1089   if (type == NORM_2) {
1090     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1096 {
1097   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1098   PetscErrorCode ierr;
1099   PetscScalar    *a;
1100   PetscInt       m,n,i;
1101 
1102   PetscFunctionBegin;
1103   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1104   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1105   for (i=0; i<m*n; i++) {
1106     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1107   }
1108   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1113 
1114 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1115 {
1116   PetscFunctionBegin;
1117   *missing = PETSC_FALSE;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1122 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1123 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1124 
1125 /* -------------------------------------------------------------------*/
1126 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1127                                         MatGetRow_MPIDense,
1128                                         MatRestoreRow_MPIDense,
1129                                         MatMult_MPIDense,
1130                                 /*  4*/ MatMultAdd_MPIDense,
1131                                         MatMultTranspose_MPIDense,
1132                                         MatMultTransposeAdd_MPIDense,
1133                                         0,
1134                                         0,
1135                                         0,
1136                                 /* 10*/ 0,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                         MatTranspose_MPIDense,
1141                                 /* 15*/ MatGetInfo_MPIDense,
1142                                         MatEqual_MPIDense,
1143                                         MatGetDiagonal_MPIDense,
1144                                         MatDiagonalScale_MPIDense,
1145                                         MatNorm_MPIDense,
1146                                 /* 20*/ MatAssemblyBegin_MPIDense,
1147                                         MatAssemblyEnd_MPIDense,
1148                                         MatSetOption_MPIDense,
1149                                         MatZeroEntries_MPIDense,
1150                                 /* 24*/ MatZeroRows_MPIDense,
1151                                         0,
1152                                         0,
1153                                         0,
1154                                         0,
1155                                 /* 29*/ MatSetUp_MPIDense,
1156                                         0,
1157                                         0,
1158                                         MatGetDiagonalBlock_MPIDense,
1159                                         0,
1160                                 /* 34*/ MatDuplicate_MPIDense,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /* 39*/ MatAXPY_MPIDense,
1166                                         MatCreateSubMatrices_MPIDense,
1167                                         0,
1168                                         MatGetValues_MPIDense,
1169                                         0,
1170                                 /* 44*/ 0,
1171                                         MatScale_MPIDense,
1172                                         MatShift_Basic,
1173                                         0,
1174                                         0,
1175                                 /* 49*/ MatSetRandom_MPIDense,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /* 54*/ 0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1186                                         MatDestroy_MPIDense,
1187                                         MatView_MPIDense,
1188                                         0,
1189                                         0,
1190                                 /* 64*/ 0,
1191                                         0,
1192                                         0,
1193                                         0,
1194                                         0,
1195                                 /* 69*/ 0,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                         0,
1200                                 /* 74*/ 0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                 /* 79*/ 0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                 /* 83*/ MatLoad_MPIDense,
1210                                         0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                         0,
1215 #if defined(PETSC_HAVE_ELEMENTAL)
1216                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1217                                         MatMatMultSymbolic_MPIDense_MPIDense,
1218 #else
1219                                 /* 89*/ 0,
1220                                         0,
1221 #endif
1222                                         MatMatMultNumeric_MPIDense,
1223                                         0,
1224                                         0,
1225                                 /* 94*/ 0,
1226                                         MatMatTransposeMult_MPIDense_MPIDense,
1227                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1228                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1229                                         0,
1230                                 /* 99*/ 0,
1231                                         0,
1232                                         0,
1233                                         MatConjugate_MPIDense,
1234                                         0,
1235                                 /*104*/ 0,
1236                                         MatRealPart_MPIDense,
1237                                         MatImaginaryPart_MPIDense,
1238                                         0,
1239                                         0,
1240                                 /*109*/ 0,
1241                                         0,
1242                                         0,
1243                                         MatGetColumnVector_MPIDense,
1244                                         MatMissingDiagonal_MPIDense,
1245                                 /*114*/ 0,
1246                                         0,
1247                                         0,
1248                                         0,
1249                                         0,
1250                                 /*119*/ 0,
1251                                         0,
1252                                         0,
1253                                         0,
1254                                         0,
1255                                 /*124*/ 0,
1256                                         MatGetColumnNorms_MPIDense,
1257                                         0,
1258                                         0,
1259                                         0,
1260                                 /*129*/ 0,
1261                                         MatTransposeMatMult_MPIDense_MPIDense,
1262                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1263                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1264                                         0,
1265                                 /*134*/ 0,
1266                                         0,
1267                                         0,
1268                                         0,
1269                                         0,
1270                                 /*139*/ 0,
1271                                         0,
1272                                         0,
1273                                         0,
1274                                         0,
1275                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1276 };
1277 
1278 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1279 {
1280   Mat_MPIDense   *a;
1281   PetscErrorCode ierr;
1282 
1283   PetscFunctionBegin;
1284   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1285   mat->preallocated = PETSC_TRUE;
1286   /* Note:  For now, when data is specified above, this assumes the user correctly
1287    allocates the local dense storage space.  We should add error checking. */
1288 
1289   a       = (Mat_MPIDense*)mat->data;
1290   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1291   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1292   a->nvec = mat->cmap->n;
1293 
1294   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1295   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1296   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1297   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1298   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #if defined(PETSC_HAVE_ELEMENTAL)
1303 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1304 {
1305   Mat            mat_elemental;
1306   PetscErrorCode ierr;
1307   PetscScalar    *v;
1308   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1309 
1310   PetscFunctionBegin;
1311   if (reuse == MAT_REUSE_MATRIX) {
1312     mat_elemental = *newmat;
1313     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1314   } else {
1315     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1316     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1317     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1318     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1319     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1320   }
1321 
1322   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1323   for (i=0; i<N; i++) cols[i] = i;
1324   for (i=0; i<m; i++) rows[i] = rstart + i;
1325 
1326   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1327   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1328   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1329   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1330   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1331   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1332   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1333 
1334   if (reuse == MAT_INPLACE_MATRIX) {
1335     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1336   } else {
1337     *newmat = mat_elemental;
1338   }
1339   PetscFunctionReturn(0);
1340 }
1341 #endif
1342 
1343 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1344 {
1345   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1354 {
1355   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1364 {
1365   PetscErrorCode ierr;
1366   Mat_MPIDense   *mat;
1367   PetscInt       m,nloc,N;
1368 
1369   PetscFunctionBegin;
1370   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1371   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1372   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1373     PetscInt sum;
1374 
1375     if (n == PETSC_DECIDE) {
1376       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1377     }
1378     /* Check sum(n) = N */
1379     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1380     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1381 
1382     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1383   }
1384 
1385   /* numeric phase */
1386   mat = (Mat_MPIDense*)(*outmat)->data;
1387   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1388   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1394 {
1395   Mat_MPIDense   *a;
1396   PetscErrorCode ierr;
1397 
1398   PetscFunctionBegin;
1399   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1400   mat->data = (void*)a;
1401   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1402 
1403   mat->insertmode = NOT_SET_VALUES;
1404   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1405   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1406 
1407   /* build cache for off array entries formed */
1408   a->donotstash = PETSC_FALSE;
1409 
1410   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1411 
1412   /* stuff used for matrix vector multiply */
1413   a->lvec        = 0;
1414   a->Mvctx       = 0;
1415   a->roworiented = PETSC_TRUE;
1416 
1417   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1418   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1422   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1423   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1424 #if defined(PETSC_HAVE_ELEMENTAL)
1425   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1426 #endif
1427   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1428   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1429   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1430   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1431 
1432   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1433   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1437   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 /*MC
1442    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1443 
1444    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1445    and MATMPIDENSE otherwise.
1446 
1447    Options Database Keys:
1448 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1449 
1450   Level: beginner
1451 
1452 
1453 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1454 M*/
1455 
1456 /*@C
1457    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1458 
1459    Not collective
1460 
1461    Input Parameters:
1462 .  B - the matrix
1463 -  data - optional location of matrix data.  Set data=NULL for PETSc
1464    to control all matrix memory allocation.
1465 
1466    Notes:
1467    The dense format is fully compatible with standard Fortran 77
1468    storage by columns.
1469 
1470    The data input variable is intended primarily for Fortran programmers
1471    who wish to allocate their own matrix memory space.  Most users should
1472    set data=NULL.
1473 
1474    Level: intermediate
1475 
1476 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1477 @*/
1478 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1479 {
1480   PetscErrorCode ierr;
1481 
1482   PetscFunctionBegin;
1483   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 /*@
1488    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1489    array provided by the user. This is useful to avoid copying an array
1490    into a matrix
1491 
1492    Not Collective
1493 
1494    Input Parameters:
1495 +  mat - the matrix
1496 -  array - the array in column major order
1497 
1498    Notes:
1499    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1500    freed when the matrix is destroyed.
1501 
1502    Level: developer
1503 
1504 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1505 
1506 @*/
1507 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1508 {
1509   PetscErrorCode ierr;
1510   PetscFunctionBegin;
1511   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1512   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 /*@
1517    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1518 
1519    Not Collective
1520 
1521    Input Parameters:
1522 .  mat - the matrix
1523 
1524    Notes:
1525    You can only call this after a call to MatDensePlaceArray()
1526 
1527    Level: developer
1528 
1529 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1530 
1531 @*/
1532 PetscErrorCode  MatDenseResetArray(Mat mat)
1533 {
1534   PetscErrorCode ierr;
1535   PetscFunctionBegin;
1536   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1537   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 /*@C
1542    MatCreateDense - Creates a parallel matrix in dense format.
1543 
1544    Collective
1545 
1546    Input Parameters:
1547 +  comm - MPI communicator
1548 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1549 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1550 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1551 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1552 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1553    to control all matrix memory allocation.
1554 
1555    Output Parameter:
1556 .  A - the matrix
1557 
1558    Notes:
1559    The dense format is fully compatible with standard Fortran 77
1560    storage by columns.
1561 
1562    The data input variable is intended primarily for Fortran programmers
1563    who wish to allocate their own matrix memory space.  Most users should
1564    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1565 
1566    The user MUST specify either the local or global matrix dimensions
1567    (possibly both).
1568 
1569    Level: intermediate
1570 
1571 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1572 @*/
1573 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1574 {
1575   PetscErrorCode ierr;
1576   PetscMPIInt    size;
1577 
1578   PetscFunctionBegin;
1579   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1580   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1581   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1582   if (size > 1) {
1583     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1584     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1585     if (data) {  /* user provided data array, so no need to assemble */
1586       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1587       (*A)->assembled = PETSC_TRUE;
1588     }
1589   } else {
1590     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1591     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1592   }
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1597 {
1598   Mat            mat;
1599   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1600   PetscErrorCode ierr;
1601 
1602   PetscFunctionBegin;
1603   *newmat = 0;
1604   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1605   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1606   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1607   a       = (Mat_MPIDense*)mat->data;
1608 
1609   mat->factortype   = A->factortype;
1610   mat->assembled    = PETSC_TRUE;
1611   mat->preallocated = PETSC_TRUE;
1612 
1613   a->size         = oldmat->size;
1614   a->rank         = oldmat->rank;
1615   mat->insertmode = NOT_SET_VALUES;
1616   a->nvec         = oldmat->nvec;
1617   a->donotstash   = oldmat->donotstash;
1618 
1619   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1620   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1621 
1622   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1623   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1624   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1625 
1626   *newmat = mat;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1631 {
1632   PetscErrorCode ierr;
1633   PetscMPIInt    rank,size;
1634   const PetscInt *rowners;
1635   PetscInt       i,m,n,nz,j,mMax;
1636   PetscScalar    *array,*vals,*vals_ptr;
1637   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1638 
1639   PetscFunctionBegin;
1640   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1641   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1642 
1643   /* determine ownership of rows and columns */
1644   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1645   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1646 
1647   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1648   if (!a->A) {
1649     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1650   }
1651   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1652   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1653   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1654   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1655   if (!rank) {
1656     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1657 
1658     /* read in my part of the matrix numerical values  */
1659     ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1660 
1661     /* insert into matrix-by row (this is why cannot directly read into array */
1662     vals_ptr = vals;
1663     for (i=0; i<m; i++) {
1664       for (j=0; j<N; j++) {
1665         array[i + j*m] = *vals_ptr++;
1666       }
1667     }
1668 
1669     /* read in other processors and ship out */
1670     for (i=1; i<size; i++) {
1671       nz   = (rowners[i+1] - rowners[i])*N;
1672       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1673       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1674     }
1675   } else {
1676     /* receive numeric values */
1677     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1678 
1679     /* receive message of values*/
1680     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1681 
1682     /* insert into matrix-by row (this is why cannot directly read into array */
1683     vals_ptr = vals;
1684     for (i=0; i<m; i++) {
1685       for (j=0; j<N; j++) {
1686         array[i + j*m] = *vals_ptr++;
1687       }
1688     }
1689   }
1690   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1691   ierr = PetscFree(vals);CHKERRQ(ierr);
1692   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 static PetscErrorCode MatLoad_MPIDense_Binary(Mat newmat,PetscViewer viewer)
1698 {
1699   Mat_MPIDense   *a;
1700   PetscScalar    *vals,*svals;
1701   MPI_Comm       comm;
1702   MPI_Status     status;
1703   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1704   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1705   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1706   PetscInt       i,nz,j,rstart,rend;
1707   int            fd;
1708   PetscErrorCode ierr;
1709 
1710   PetscFunctionBegin;
1711   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1712   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1713   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1714   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1715   if (!rank) {
1716     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
1717     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1718   }
1719   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1720   M    = header[1]; N = header[2]; nz = header[3];
1721 
1722   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1723   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1724   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1725 
1726   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1727   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1728 
1729   /*
1730        Handle case where matrix is stored on disk as a dense matrix
1731   */
1732   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1733     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1734     PetscFunctionReturn(0);
1735   }
1736 
1737   /* determine ownership of all rows */
1738   if (newmat->rmap->n < 0) {
1739     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1740   } else {
1741     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1742   }
1743   if (newmat->cmap->n < 0) {
1744     n = PETSC_DECIDE;
1745   } else {
1746     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1747   }
1748 
1749   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1750   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1751   rowners[0] = 0;
1752   for (i=2; i<=size; i++) {
1753     rowners[i] += rowners[i-1];
1754   }
1755   rstart = rowners[rank];
1756   rend   = rowners[rank+1];
1757 
1758   /* distribute row lengths to all processors */
1759   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1760   if (!rank) {
1761     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1762     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
1763     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1764     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1765     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1766     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1767   } else {
1768     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1769   }
1770 
1771   if (!rank) {
1772     /* calculate the number of nonzeros on each processor */
1773     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1774     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1775     for (i=0; i<size; i++) {
1776       for (j=rowners[i]; j< rowners[i+1]; j++) {
1777         procsnz[i] += rowlengths[j];
1778       }
1779     }
1780     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1781 
1782     /* determine max buffer needed and allocate it */
1783     maxnz = 0;
1784     for (i=0; i<size; i++) {
1785       maxnz = PetscMax(maxnz,procsnz[i]);
1786     }
1787     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1788 
1789     /* read in my part of the matrix column indices  */
1790     nz   = procsnz[0];
1791     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1792     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1793 
1794     /* read in every one elses and ship off */
1795     for (i=1; i<size; i++) {
1796       nz   = procsnz[i];
1797       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1798       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1799     }
1800     ierr = PetscFree(cols);CHKERRQ(ierr);
1801   } else {
1802     /* determine buffer space needed for message */
1803     nz = 0;
1804     for (i=0; i<m; i++) {
1805       nz += ourlens[i];
1806     }
1807     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1808 
1809     /* receive message of column indices*/
1810     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1811     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1812     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1813   }
1814 
1815   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1816   a = (Mat_MPIDense*)newmat->data;
1817   if (!a->A) {
1818     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1819   }
1820 
1821   if (!rank) {
1822     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1823 
1824     /* read in my part of the matrix numerical values  */
1825     nz   = procsnz[0];
1826     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1827 
1828     /* insert into matrix */
1829     jj      = rstart;
1830     smycols = mycols;
1831     svals   = vals;
1832     for (i=0; i<m; i++) {
1833       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1834       smycols += ourlens[i];
1835       svals   += ourlens[i];
1836       jj++;
1837     }
1838 
1839     /* read in other processors and ship out */
1840     for (i=1; i<size; i++) {
1841       nz   = procsnz[i];
1842       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1843       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1844     }
1845     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1846   } else {
1847     /* receive numeric values */
1848     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1849 
1850     /* receive message of values*/
1851     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1852     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1853     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1854 
1855     /* insert into matrix */
1856     jj      = rstart;
1857     smycols = mycols;
1858     svals   = vals;
1859     for (i=0; i<m; i++) {
1860       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1861       smycols += ourlens[i];
1862       svals   += ourlens[i];
1863       jj++;
1864     }
1865   }
1866   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1867   ierr = PetscFree(vals);CHKERRQ(ierr);
1868   ierr = PetscFree(mycols);CHKERRQ(ierr);
1869   ierr = PetscFree(rowners);CHKERRQ(ierr);
1870 
1871   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1872   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1877 {
1878   PetscBool      isbinary, ishdf5;
1879   PetscErrorCode ierr;
1880 
1881   PetscFunctionBegin;
1882   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
1883   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
1884   /* force binary viewer to load .info file if it has not yet done so */
1885   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1886   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1887   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
1888   if (isbinary) {
1889     ierr = MatLoad_MPIDense_Binary(newMat,viewer);CHKERRQ(ierr);
1890   } else if (ishdf5) {
1891 #if defined(PETSC_HAVE_HDF5)
1892     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
1893 #else
1894     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
1895 #endif
1896   } else {
1897     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1903 {
1904   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1905   Mat            a,b;
1906   PetscBool      flg;
1907   PetscErrorCode ierr;
1908 
1909   PetscFunctionBegin;
1910   a    = matA->A;
1911   b    = matB->A;
1912   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1913   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1918 {
1919   PetscErrorCode        ierr;
1920   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1921   Mat_TransMatMultDense *atb = a->atbdense;
1922 
1923   PetscFunctionBegin;
1924   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1925   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1926   ierr = PetscFree(atb);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1931 {
1932   PetscErrorCode        ierr;
1933   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1934   Mat_MatTransMultDense *abt = a->abtdense;
1935 
1936   PetscFunctionBegin;
1937   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1938   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1939   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1940   ierr = PetscFree(abt);CHKERRQ(ierr);
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1945 {
1946   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1947   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1948   Mat_TransMatMultDense *atb = c->atbdense;
1949   PetscErrorCode ierr;
1950   MPI_Comm       comm;
1951   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1952   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1953   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1954   PetscScalar    _DOne=1.0,_DZero=0.0;
1955   PetscBLASInt   am,an,bn,aN;
1956   const PetscInt *ranges;
1957 
1958   PetscFunctionBegin;
1959   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1960   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1961   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1962 
1963   /* compute atbarray = aseq^T * bseq */
1964   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1965   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1966   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1967   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1968   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1969 
1970   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1971   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1972 
1973   /* arrange atbarray into sendbuf */
1974   k = 0;
1975   for (proc=0; proc<size; proc++) {
1976     for (j=0; j<cN; j++) {
1977       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1978     }
1979   }
1980   /* sum all atbarray to local values of C */
1981   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1982   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1983   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1988 {
1989   PetscErrorCode        ierr;
1990   Mat                   Cdense;
1991   MPI_Comm              comm;
1992   PetscMPIInt           size;
1993   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1994   Mat_MPIDense          *c;
1995   Mat_TransMatMultDense *atb;
1996 
1997   PetscFunctionBegin;
1998   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1999   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2000     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2001   }
2002 
2003   /* create matrix product Cdense */
2004   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2005   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2006   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2007   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2008   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2009   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2010   *C   = Cdense;
2011 
2012   /* create data structure for reuse Cdense */
2013   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2014   ierr = PetscNew(&atb);CHKERRQ(ierr);
2015   cM = Cdense->rmap->N;
2016   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
2017 
2018   c                    = (Mat_MPIDense*)Cdense->data;
2019   c->atbdense          = atb;
2020   atb->destroy         = Cdense->ops->destroy;
2021   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2026 {
2027   PetscErrorCode ierr;
2028 
2029   PetscFunctionBegin;
2030   if (scall == MAT_INITIAL_MATRIX) {
2031     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2032   }
2033   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
2038 {
2039   PetscErrorCode        ierr;
2040   Mat                   Cdense;
2041   MPI_Comm              comm;
2042   PetscMPIInt           i, size;
2043   PetscInt              maxRows, bufsiz;
2044   Mat_MPIDense          *c;
2045   PetscMPIInt           tag;
2046   const char            *algTypes[2] = {"allgatherv","cyclic"};
2047   PetscInt              alg, nalg = 2;
2048   Mat_MatTransMultDense *abt;
2049 
2050   PetscFunctionBegin;
2051   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2052   alg = 0; /* default is allgatherv */
2053   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2054   ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr);
2055   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2056   if (A->cmap->N != B->cmap->N) {
2057     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2058   }
2059 
2060   /* create matrix product Cdense */
2061   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2062   ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2063   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2064   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2065   ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr);
2066   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2067   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2068   *C   = Cdense;
2069 
2070   /* create data structure for reuse Cdense */
2071   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2072   ierr = PetscNew(&abt);CHKERRQ(ierr);
2073   abt->tag = tag;
2074   abt->alg = alg;
2075   switch (alg) {
2076   case 1:
2077     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2078     bufsiz = A->cmap->N * maxRows;
2079     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2080     break;
2081   default:
2082     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2083     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2084     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2085     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2086     break;
2087   }
2088 
2089   c                    = (Mat_MPIDense*)Cdense->data;
2090   c->abtdense          = abt;
2091   abt->destroy         = Cdense->ops->destroy;
2092   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2097 {
2098   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2099   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2100   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2101   Mat_MatTransMultDense *abt = c->abtdense;
2102   PetscErrorCode ierr;
2103   MPI_Comm       comm;
2104   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2105   PetscScalar    *sendbuf, *recvbuf=0, *carray;
2106   PetscInt       i,cK=A->cmap->N,k,j,bn;
2107   PetscScalar    _DOne=1.0,_DZero=0.0;
2108   PetscBLASInt   cm, cn, ck;
2109   MPI_Request    reqs[2];
2110   const PetscInt *ranges;
2111 
2112   PetscFunctionBegin;
2113   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2114   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2115   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2116 
2117   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2118   bn = B->rmap->n;
2119   if (bseq->lda == bn) {
2120     sendbuf = bseq->v;
2121   } else {
2122     sendbuf = abt->buf[0];
2123     for (k = 0, i = 0; i < cK; i++) {
2124       for (j = 0; j < bn; j++, k++) {
2125         sendbuf[k] = bseq->v[i * bseq->lda + j];
2126       }
2127     }
2128   }
2129   if (size > 1) {
2130     sendto = (rank + size - 1) % size;
2131     recvfrom = (rank + size + 1) % size;
2132   } else {
2133     sendto = recvfrom = 0;
2134   }
2135   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2136   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2137   recvisfrom = rank;
2138   for (i = 0; i < size; i++) {
2139     /* we have finished receiving in sending, bufs can be read/modified */
2140     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2141     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2142 
2143     if (nextrecvisfrom != rank) {
2144       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2145       sendsiz = cK * bn;
2146       recvsiz = cK * nextbn;
2147       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2148       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2149       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2150     }
2151 
2152     /* local aseq * sendbuf^T */
2153     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2154     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2155     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2156 
2157     if (nextrecvisfrom != rank) {
2158       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2159       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2160     }
2161     bn = nextbn;
2162     recvisfrom = nextrecvisfrom;
2163     sendbuf = recvbuf;
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2169 {
2170   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2171   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2172   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2173   Mat_MatTransMultDense *abt = c->abtdense;
2174   PetscErrorCode ierr;
2175   MPI_Comm       comm;
2176   PetscMPIInt    rank,size;
2177   PetscScalar    *sendbuf, *recvbuf;
2178   PetscInt       i,cK=A->cmap->N,k,j,bn;
2179   PetscScalar    _DOne=1.0,_DZero=0.0;
2180   PetscBLASInt   cm, cn, ck;
2181 
2182   PetscFunctionBegin;
2183   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2184   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2185   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2186 
2187   /* copy transpose of B into buf[0] */
2188   bn      = B->rmap->n;
2189   sendbuf = abt->buf[0];
2190   recvbuf = abt->buf[1];
2191   for (k = 0, j = 0; j < bn; j++) {
2192     for (i = 0; i < cK; i++, k++) {
2193       sendbuf[k] = bseq->v[i * bseq->lda + j];
2194     }
2195   }
2196   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2197   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2198   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2199   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2200   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2205 {
2206   Mat_MPIDense   *c=(Mat_MPIDense*)C->data;
2207   Mat_MatTransMultDense *abt = c->abtdense;
2208   PetscErrorCode ierr;
2209 
2210   PetscFunctionBegin;
2211   switch (abt->alg) {
2212   case 1:
2213     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2214     break;
2215   default:
2216     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2217     break;
2218   }
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2223 {
2224   PetscErrorCode ierr;
2225 
2226   PetscFunctionBegin;
2227   if (scall == MAT_INITIAL_MATRIX) {
2228     ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2229   }
2230   ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2235 {
2236   PetscErrorCode   ierr;
2237   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2238   Mat_MatMultDense *ab = a->abdense;
2239 
2240   PetscFunctionBegin;
2241   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2242   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2243   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2244 
2245   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2246   ierr = PetscFree(ab);CHKERRQ(ierr);
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #if defined(PETSC_HAVE_ELEMENTAL)
2251 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2252 {
2253   PetscErrorCode   ierr;
2254   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2255   Mat_MatMultDense *ab=c->abdense;
2256 
2257   PetscFunctionBegin;
2258   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2259   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2260   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2261   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2266 {
2267   PetscErrorCode   ierr;
2268   Mat              Ae,Be,Ce;
2269   Mat_MPIDense     *c;
2270   Mat_MatMultDense *ab;
2271 
2272   PetscFunctionBegin;
2273   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2274     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2275   }
2276 
2277   /* convert A and B to Elemental matrices Ae and Be */
2278   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2279   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2280 
2281   /* Ce = Ae*Be */
2282   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2283   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2284 
2285   /* convert Ce to C */
2286   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2287 
2288   /* create data structure for reuse Cdense */
2289   ierr = PetscNew(&ab);CHKERRQ(ierr);
2290   c                  = (Mat_MPIDense*)(*C)->data;
2291   c->abdense         = ab;
2292 
2293   ab->Ae             = Ae;
2294   ab->Be             = Be;
2295   ab->Ce             = Ce;
2296   ab->destroy        = (*C)->ops->destroy;
2297   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2298   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2303 {
2304   PetscErrorCode ierr;
2305 
2306   PetscFunctionBegin;
2307   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2308     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2309     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2310     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2311   } else {
2312     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2313     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2314     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2315   }
2316   PetscFunctionReturn(0);
2317 }
2318 #endif
2319 
2320