xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 702c4f2fb2b40d2bce4bfb3c8519157ae61fced8)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
188 {
189   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
190   PetscErrorCode ierr;
191 
192   PetscFunctionBegin;
193   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
198 {
199   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
200   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
201   PetscErrorCode ierr;
202   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
203   const PetscInt *irow,*icol;
204   PetscScalar    *av,*bv,*v = lmat->v;
205   Mat            newmat;
206   IS             iscol_local;
207   MPI_Comm       comm_is,comm_mat;
208 
209   PetscFunctionBegin;
210   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
211   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
212   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
213 
214   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
215   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
216   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
217   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
218   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
219   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
220 
221   /* No parallel redistribution currently supported! Should really check each index set
222      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
223      original matrix! */
224 
225   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
226   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
227 
228   /* Check submatrix call */
229   if (scall == MAT_REUSE_MATRIX) {
230     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
231     /* Really need to test rows and column sizes! */
232     newmat = *B;
233   } else {
234     /* Create and fill new matrix */
235     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
236     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
237     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
238     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
239   }
240 
241   /* Now extract the data pointers and do the copy, column at a time */
242   newmatd = (Mat_MPIDense*)newmat->data;
243   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
244 
245   for (i=0; i<Ncols; i++) {
246     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
247     for (j=0; j<nrows; j++) {
248       *bv++ = av[irow[j] - rstart];
249     }
250   }
251 
252   /* Assemble the matrices so that the correct flags are set */
253   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
254   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
255 
256   /* Free work space */
257   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
259   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
260   *B   = newmat;
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
275 {
276   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
287   MPI_Comm       comm;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
294   /* make sure all processors are either in INSERTMODE or ADDMODE */
295   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
296   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
297   mat->insertmode = addv; /* in case this processor had no cache */
298 
299   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
300   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
301   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
306 {
307   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
308   PetscErrorCode ierr;
309   PetscInt       i,*row,*col,flg,j,rstart,ncols;
310   PetscMPIInt    n;
311   PetscScalar    *val;
312 
313   PetscFunctionBegin;
314   /*  wait on receives */
315   while (1) {
316     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
317     if (!flg) break;
318 
319     for (i=0; i<n;) {
320       /* Now identify the consecutive vals belonging to the same row */
321       for (j=i,rstart=row[j]; j<n; j++) {
322         if (row[j] != rstart) break;
323       }
324       if (j < n) ncols = j-i;
325       else       ncols = n-i;
326       /* Now assemble all these values with a single function call */
327       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
328       i    = j;
329     }
330   }
331   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
332 
333   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
334   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
335 
336   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
337     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
338   }
339   PetscFunctionReturn(0);
340 }
341 
342 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
343 {
344   PetscErrorCode ierr;
345   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
346 
347   PetscFunctionBegin;
348   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* the code does not do the diagonal entries correctly unless the
353    matrix is square and the column and row owerships are identical.
354    This is a BUG. The only way to fix it seems to be to access
355    mdn->A and mdn->B directly and not through the MatZeroRows()
356    routine.
357 */
358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
359 {
360   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
361   PetscErrorCode    ierr;
362   PetscInt          i,*owners = A->rmap->range;
363   PetscInt          *sizes,j,idx,nsends;
364   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
365   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
366   PetscInt          *lens,*lrows,*values;
367   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
368   MPI_Comm          comm;
369   MPI_Request       *send_waits,*recv_waits;
370   MPI_Status        recv_status,*send_status;
371   PetscBool         found;
372   const PetscScalar *xx;
373   PetscScalar       *bb;
374 
375   PetscFunctionBegin;
376   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
377   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
378   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
379   /*  first count number of contributors to each processor */
380   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
381   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
382   for (i=0; i<N; i++) {
383     idx   = rows[i];
384     found = PETSC_FALSE;
385     for (j=0; j<size; j++) {
386       if (idx >= owners[j] && idx < owners[j+1]) {
387         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
388       }
389     }
390     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
391   }
392   nsends = 0;
393   for (i=0; i<size; i++) nsends += sizes[2*i+1];
394 
395   /* inform other processors of number of messages and max length*/
396   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
397 
398   /* post receives:   */
399   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
401   for (i=0; i<nrecvs; i++) {
402     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
403   }
404 
405   /* do sends:
406       1) starts[i] gives the starting index in svalues for stuff going to
407          the ith processor
408   */
409   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
410   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
411   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
412 
413   starts[0] = 0;
414   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
415   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
416 
417   starts[0] = 0;
418   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
419   count = 0;
420   for (i=0; i<size; i++) {
421     if (sizes[2*i+1]) {
422       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
423     }
424   }
425   ierr = PetscFree(starts);CHKERRQ(ierr);
426 
427   base = owners[rank];
428 
429   /*  wait on receives */
430   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
431   count = nrecvs;
432   slen  = 0;
433   while (count) {
434     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
435     /* unpack receives into our local space */
436     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
437 
438     source[imdex] = recv_status.MPI_SOURCE;
439     lens[imdex]   = n;
440     slen += n;
441     count--;
442   }
443   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
444 
445   /* move the data into the send scatter */
446   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
447   count = 0;
448   for (i=0; i<nrecvs; i++) {
449     values = rvalues + i*nmax;
450     for (j=0; j<lens[i]; j++) {
451       lrows[count++] = values[j] - base;
452     }
453   }
454   ierr = PetscFree(rvalues);CHKERRQ(ierr);
455   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
456   ierr = PetscFree(owner);CHKERRQ(ierr);
457   ierr = PetscFree(sizes);CHKERRQ(ierr);
458 
459   /* fix right hand side if needed */
460   if (x && b) {
461     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
462     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
463     for (i=0; i<slen; i++) {
464       bb[lrows[i]] = diag*xx[lrows[i]];
465     }
466     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
467     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
468   }
469 
470   /* actually zap the local rows */
471   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
472   if (diag != 0.0) {
473     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
474     PetscInt     m   = ll->lda, i;
475 
476     for (i=0; i<slen; i++) {
477       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
478     }
479   }
480   ierr = PetscFree(lrows);CHKERRQ(ierr);
481 
482   /* wait on sends */
483   if (nsends) {
484     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
485     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
486     ierr = PetscFree(send_status);CHKERRQ(ierr);
487   }
488   ierr = PetscFree(send_waits);CHKERRQ(ierr);
489   ierr = PetscFree(svalues);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
497 
498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
501   PetscErrorCode ierr;
502 
503   PetscFunctionBegin;
504   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
517   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
523 {
524   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
525   PetscErrorCode ierr;
526   PetscScalar    zero = 0.0;
527 
528   PetscFunctionBegin;
529   ierr = VecSet(yy,zero);CHKERRQ(ierr);
530   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
531   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
532   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
537 {
538   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
543   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
544   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
545   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
550 {
551   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
552   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
553   PetscErrorCode ierr;
554   PetscInt       len,i,n,m = A->rmap->n,radd;
555   PetscScalar    *x,zero = 0.0;
556 
557   PetscFunctionBegin;
558   ierr = VecSet(v,zero);CHKERRQ(ierr);
559   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
560   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
561   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
563   radd = A->rmap->rstart*m;
564   for (i=0; i<len; i++) {
565     x[i] = aloc->v[radd + i*m + i];
566   }
567   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 PetscErrorCode MatDestroy_MPIDense(Mat mat)
572 {
573   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
579 #endif
580   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
581   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
582   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
583   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
584 
585   ierr = PetscFree(mat->data);CHKERRQ(ierr);
586   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
587 
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
595 #if defined(PETSC_HAVE_ELEMENTAL)
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
597 #endif
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_nest_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
609   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
614 {
615   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
616   PetscErrorCode    ierr;
617   PetscViewerFormat format;
618   int               fd;
619   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
620   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
621   PetscScalar       *work,*v,*vv;
622   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
623 
624   PetscFunctionBegin;
625   if (mdn->size == 1) {
626     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
627   } else {
628     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
629     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
630     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
631 
632     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
633     if (format == PETSC_VIEWER_NATIVE) {
634 
635       if (!rank) {
636         /* store the matrix as a dense matrix */
637         header[0] = MAT_FILE_CLASSID;
638         header[1] = mat->rmap->N;
639         header[2] = N;
640         header[3] = MATRIX_BINARY_FORMAT_DENSE;
641         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
642 
643         /* get largest work array needed for transposing array */
644         mmax = mat->rmap->n;
645         for (i=1; i<size; i++) {
646           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
647         }
648         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
649 
650         /* write out local array, by rows */
651         m = mat->rmap->n;
652         v = a->v;
653         for (j=0; j<N; j++) {
654           for (i=0; i<m; i++) {
655             work[j + i*N] = *v++;
656           }
657         }
658         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
659         /* get largest work array to receive messages from other processes, excludes process zero */
660         mmax = 0;
661         for (i=1; i<size; i++) {
662           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
663         }
664         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
665         for (k = 1; k < size; k++) {
666           v    = vv;
667           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
668           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
669 
670           for (j = 0; j < N; j++) {
671             for (i = 0; i < m; i++) {
672               work[j + i*N] = *v++;
673             }
674           }
675           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
676         }
677         ierr = PetscFree(work);CHKERRQ(ierr);
678         ierr = PetscFree(vv);CHKERRQ(ierr);
679       } else {
680         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
681       }
682     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
683   }
684   PetscFunctionReturn(0);
685 }
686 
687 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
688 
689 #include <petscdraw.h>
690 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
691 {
692   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
693   PetscErrorCode    ierr;
694   PetscMPIInt       rank = mdn->rank;
695   PetscViewerType   vtype;
696   PetscBool         iascii,isdraw;
697   PetscViewer       sviewer;
698   PetscViewerFormat format;
699 
700   PetscFunctionBegin;
701   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
702   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
703   if (iascii) {
704     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
705     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
706     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
707       MatInfo info;
708       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
709       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
710       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
711       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
712       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
713       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
714       PetscFunctionReturn(0);
715     } else if (format == PETSC_VIEWER_ASCII_INFO) {
716       PetscFunctionReturn(0);
717     }
718   } else if (isdraw) {
719     PetscDraw draw;
720     PetscBool isnull;
721 
722     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
723     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
724     if (isnull) PetscFunctionReturn(0);
725   }
726 
727   {
728     /* assemble the entire matrix onto first processor. */
729     Mat         A;
730     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
731     PetscInt    *cols;
732     PetscScalar *vals;
733 
734     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
735     if (!rank) {
736       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
737     } else {
738       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
739     }
740     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
741     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
742     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
743     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
744 
745     /* Copy the matrix ... This isn't the most efficient means,
746        but it's quick for now */
747     A->insertmode = INSERT_VALUES;
748 
749     row = mat->rmap->rstart;
750     m   = mdn->A->rmap->n;
751     for (i=0; i<m; i++) {
752       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
753       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
754       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
755       row++;
756     }
757 
758     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
759     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
760     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
761     if (!rank) {
762       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
763       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
764     }
765     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
766     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
767     ierr = MatDestroy(&A);CHKERRQ(ierr);
768   }
769   PetscFunctionReturn(0);
770 }
771 
772 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
773 {
774   PetscErrorCode ierr;
775   PetscBool      iascii,isbinary,isdraw,issocket;
776 
777   PetscFunctionBegin;
778   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
779   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
780   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
781   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
782 
783   if (iascii || issocket || isdraw) {
784     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
785   } else if (isbinary) {
786     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
787   }
788   PetscFunctionReturn(0);
789 }
790 
791 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
792 {
793   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
794   Mat            mdn  = mat->A;
795   PetscErrorCode ierr;
796   PetscLogDouble isend[5],irecv[5];
797 
798   PetscFunctionBegin;
799   info->block_size = 1.0;
800 
801   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
802 
803   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
804   isend[3] = info->memory;  isend[4] = info->mallocs;
805   if (flag == MAT_LOCAL) {
806     info->nz_used      = isend[0];
807     info->nz_allocated = isend[1];
808     info->nz_unneeded  = isend[2];
809     info->memory       = isend[3];
810     info->mallocs      = isend[4];
811   } else if (flag == MAT_GLOBAL_MAX) {
812     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
813 
814     info->nz_used      = irecv[0];
815     info->nz_allocated = irecv[1];
816     info->nz_unneeded  = irecv[2];
817     info->memory       = irecv[3];
818     info->mallocs      = irecv[4];
819   } else if (flag == MAT_GLOBAL_SUM) {
820     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
821 
822     info->nz_used      = irecv[0];
823     info->nz_allocated = irecv[1];
824     info->nz_unneeded  = irecv[2];
825     info->memory       = irecv[3];
826     info->mallocs      = irecv[4];
827   }
828   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
829   info->fill_ratio_needed = 0;
830   info->factor_mallocs    = 0;
831   PetscFunctionReturn(0);
832 }
833 
834 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
835 {
836   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
837   PetscErrorCode ierr;
838 
839   PetscFunctionBegin;
840   switch (op) {
841   case MAT_NEW_NONZERO_LOCATIONS:
842   case MAT_NEW_NONZERO_LOCATION_ERR:
843   case MAT_NEW_NONZERO_ALLOCATION_ERR:
844     MatCheckPreallocated(A,1);
845     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
846     break;
847   case MAT_ROW_ORIENTED:
848     MatCheckPreallocated(A,1);
849     a->roworiented = flg;
850     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
851     break;
852   case MAT_NEW_DIAGONALS:
853   case MAT_KEEP_NONZERO_PATTERN:
854   case MAT_USE_HASH_TABLE:
855   case MAT_SORTED_FULL:
856     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
857     break;
858   case MAT_IGNORE_OFF_PROC_ENTRIES:
859     a->donotstash = flg;
860     break;
861   case MAT_SYMMETRIC:
862   case MAT_STRUCTURALLY_SYMMETRIC:
863   case MAT_HERMITIAN:
864   case MAT_SYMMETRY_ETERNAL:
865   case MAT_IGNORE_LOWER_TRIANGULAR:
866   case MAT_IGNORE_ZERO_ENTRIES:
867     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
868     break;
869   default:
870     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 
876 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
877 {
878   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
879   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
880   const PetscScalar *l,*r;
881   PetscScalar       x,*v;
882   PetscErrorCode    ierr;
883   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
884 
885   PetscFunctionBegin;
886   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
887   if (ll) {
888     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
889     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
890     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
891     for (i=0; i<m; i++) {
892       x = l[i];
893       v = mat->v + i;
894       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
895     }
896     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
897     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
898   }
899   if (rr) {
900     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
901     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
902     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
903     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
905     for (i=0; i<n; i++) {
906       x = r[i];
907       v = mat->v + i*m;
908       for (j=0; j<m; j++) (*v++) *= x;
909     }
910     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
911     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
912   }
913   PetscFunctionReturn(0);
914 }
915 
916 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
917 {
918   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
919   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
920   PetscErrorCode ierr;
921   PetscInt       i,j;
922   PetscReal      sum = 0.0;
923   PetscScalar    *v  = mat->v;
924 
925   PetscFunctionBegin;
926   if (mdn->size == 1) {
927     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
928   } else {
929     if (type == NORM_FROBENIUS) {
930       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
931         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
932       }
933       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
934       *nrm = PetscSqrtReal(*nrm);
935       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
936     } else if (type == NORM_1) {
937       PetscReal *tmp,*tmp2;
938       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
939       *nrm = 0.0;
940       v    = mat->v;
941       for (j=0; j<mdn->A->cmap->n; j++) {
942         for (i=0; i<mdn->A->rmap->n; i++) {
943           tmp[j] += PetscAbsScalar(*v);  v++;
944         }
945       }
946       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947       for (j=0; j<A->cmap->N; j++) {
948         if (tmp2[j] > *nrm) *nrm = tmp2[j];
949       }
950       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
951       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
952     } else if (type == NORM_INFINITY) { /* max row norm */
953       PetscReal ntemp;
954       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
955       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
956     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
957   }
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
962 {
963   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
964   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
965   Mat            B;
966   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
967   PetscErrorCode ierr;
968   PetscInt       j,i;
969   PetscScalar    *v;
970 
971   PetscFunctionBegin;
972   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
973     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
974     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
975     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
976     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
977   } else {
978     B = *matout;
979   }
980 
981   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
982   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
983   for (i=0; i<m; i++) rwork[i] = rstart + i;
984   for (j=0; j<n; j++) {
985     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
986     v   += m;
987   }
988   ierr = PetscFree(rwork);CHKERRQ(ierr);
989   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
992     *matout = B;
993   } else {
994     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
995   }
996   PetscFunctionReturn(0);
997 }
998 
999 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
1000 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
1001 
1002 PetscErrorCode MatSetUp_MPIDense(Mat A)
1003 {
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1012 {
1013   PetscErrorCode ierr;
1014   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1015 
1016   PetscFunctionBegin;
1017   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1018   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1023 {
1024   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1025   PetscErrorCode ierr;
1026 
1027   PetscFunctionBegin;
1028   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 PetscErrorCode MatRealPart_MPIDense(Mat A)
1033 {
1034   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1035   PetscErrorCode ierr;
1036 
1037   PetscFunctionBegin;
1038   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1043 {
1044   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1053 {
1054   PetscErrorCode    ierr;
1055   PetscScalar       *x;
1056   const PetscScalar *a;
1057   PetscInt          lda;
1058 
1059   PetscFunctionBegin;
1060   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1061   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1062   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1063   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1064   ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr);
1065   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1066   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1071 
1072 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1073 {
1074   PetscErrorCode ierr;
1075   PetscInt       i,n;
1076   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1077   PetscReal      *work;
1078 
1079   PetscFunctionBegin;
1080   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1081   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1082   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1083   if (type == NORM_2) {
1084     for (i=0; i<n; i++) work[i] *= work[i];
1085   }
1086   if (type == NORM_INFINITY) {
1087     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1088   } else {
1089     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1090   }
1091   ierr = PetscFree(work);CHKERRQ(ierr);
1092   if (type == NORM_2) {
1093     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1094   }
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1099 {
1100   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1101   PetscErrorCode ierr;
1102   PetscScalar    *a;
1103   PetscInt       m,n,i;
1104 
1105   PetscFunctionBegin;
1106   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1107   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1108   for (i=0; i<m*n; i++) {
1109     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1110   }
1111   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1116 
1117 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1118 {
1119   PetscFunctionBegin;
1120   *missing = PETSC_FALSE;
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1125 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1126 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1127 
1128 /* -------------------------------------------------------------------*/
1129 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1130                                         MatGetRow_MPIDense,
1131                                         MatRestoreRow_MPIDense,
1132                                         MatMult_MPIDense,
1133                                 /*  4*/ MatMultAdd_MPIDense,
1134                                         MatMultTranspose_MPIDense,
1135                                         MatMultTransposeAdd_MPIDense,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                 /* 10*/ 0,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                         MatTranspose_MPIDense,
1144                                 /* 15*/ MatGetInfo_MPIDense,
1145                                         MatEqual_MPIDense,
1146                                         MatGetDiagonal_MPIDense,
1147                                         MatDiagonalScale_MPIDense,
1148                                         MatNorm_MPIDense,
1149                                 /* 20*/ MatAssemblyBegin_MPIDense,
1150                                         MatAssemblyEnd_MPIDense,
1151                                         MatSetOption_MPIDense,
1152                                         MatZeroEntries_MPIDense,
1153                                 /* 24*/ MatZeroRows_MPIDense,
1154                                         0,
1155                                         0,
1156                                         0,
1157                                         0,
1158                                 /* 29*/ MatSetUp_MPIDense,
1159                                         0,
1160                                         0,
1161                                         MatGetDiagonalBlock_MPIDense,
1162                                         0,
1163                                 /* 34*/ MatDuplicate_MPIDense,
1164                                         0,
1165                                         0,
1166                                         0,
1167                                         0,
1168                                 /* 39*/ MatAXPY_MPIDense,
1169                                         MatCreateSubMatrices_MPIDense,
1170                                         0,
1171                                         MatGetValues_MPIDense,
1172                                         0,
1173                                 /* 44*/ 0,
1174                                         MatScale_MPIDense,
1175                                         MatShift_Basic,
1176                                         0,
1177                                         0,
1178                                 /* 49*/ MatSetRandom_MPIDense,
1179                                         0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                 /* 54*/ 0,
1184                                         0,
1185                                         0,
1186                                         0,
1187                                         0,
1188                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1189                                         MatDestroy_MPIDense,
1190                                         MatView_MPIDense,
1191                                         0,
1192                                         0,
1193                                 /* 64*/ 0,
1194                                         0,
1195                                         0,
1196                                         0,
1197                                         0,
1198                                 /* 69*/ 0,
1199                                         0,
1200                                         0,
1201                                         0,
1202                                         0,
1203                                 /* 74*/ 0,
1204                                         0,
1205                                         0,
1206                                         0,
1207                                         0,
1208                                 /* 79*/ 0,
1209                                         0,
1210                                         0,
1211                                         0,
1212                                 /* 83*/ MatLoad_MPIDense,
1213                                         0,
1214                                         0,
1215                                         0,
1216                                         0,
1217                                         0,
1218 #if defined(PETSC_HAVE_ELEMENTAL)
1219                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1220                                         MatMatMultSymbolic_MPIDense_MPIDense,
1221 #else
1222                                 /* 89*/ 0,
1223                                         0,
1224 #endif
1225                                         MatMatMultNumeric_MPIDense,
1226                                         0,
1227                                         0,
1228                                 /* 94*/ 0,
1229                                         MatMatTransposeMult_MPIDense_MPIDense,
1230                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1231                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1232                                         0,
1233                                 /* 99*/ 0,
1234                                         0,
1235                                         0,
1236                                         MatConjugate_MPIDense,
1237                                         0,
1238                                 /*104*/ 0,
1239                                         MatRealPart_MPIDense,
1240                                         MatImaginaryPart_MPIDense,
1241                                         0,
1242                                         0,
1243                                 /*109*/ 0,
1244                                         0,
1245                                         0,
1246                                         MatGetColumnVector_MPIDense,
1247                                         MatMissingDiagonal_MPIDense,
1248                                 /*114*/ 0,
1249                                         0,
1250                                         0,
1251                                         0,
1252                                         0,
1253                                 /*119*/ 0,
1254                                         0,
1255                                         0,
1256                                         0,
1257                                         0,
1258                                 /*124*/ 0,
1259                                         MatGetColumnNorms_MPIDense,
1260                                         0,
1261                                         0,
1262                                         0,
1263                                 /*129*/ 0,
1264                                         MatTransposeMatMult_MPIDense_MPIDense,
1265                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1266                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1267                                         0,
1268                                 /*134*/ 0,
1269                                         0,
1270                                         0,
1271                                         0,
1272                                         0,
1273                                 /*139*/ 0,
1274                                         0,
1275                                         0,
1276                                         0,
1277                                         0,
1278                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1279 };
1280 
1281 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1282 {
1283   Mat_MPIDense   *a;
1284   PetscErrorCode ierr;
1285 
1286   PetscFunctionBegin;
1287   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1288   mat->preallocated = PETSC_TRUE;
1289   /* Note:  For now, when data is specified above, this assumes the user correctly
1290    allocates the local dense storage space.  We should add error checking. */
1291 
1292   a       = (Mat_MPIDense*)mat->data;
1293   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1294   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1295   a->nvec = mat->cmap->n;
1296 
1297   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1298   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1299   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1300   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1301   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #if defined(PETSC_HAVE_ELEMENTAL)
1306 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1307 {
1308   Mat            mat_elemental;
1309   PetscErrorCode ierr;
1310   PetscScalar    *v;
1311   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1312 
1313   PetscFunctionBegin;
1314   if (reuse == MAT_REUSE_MATRIX) {
1315     mat_elemental = *newmat;
1316     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1317   } else {
1318     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1319     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1320     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1321     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1322     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1323   }
1324 
1325   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1326   for (i=0; i<N; i++) cols[i] = i;
1327   for (i=0; i<m; i++) rows[i] = rstart + i;
1328 
1329   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1330   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1331   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1332   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1333   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1334   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1335   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1336 
1337   if (reuse == MAT_INPLACE_MATRIX) {
1338     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1339   } else {
1340     *newmat = mat_elemental;
1341   }
1342   PetscFunctionReturn(0);
1343 }
1344 #endif
1345 
1346 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1347 {
1348   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1349   PetscErrorCode ierr;
1350 
1351   PetscFunctionBegin;
1352   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1357 {
1358   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1367 {
1368   PetscErrorCode ierr;
1369   Mat_MPIDense   *mat;
1370   PetscInt       m,nloc,N;
1371 
1372   PetscFunctionBegin;
1373   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1374   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1375   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1376     PetscInt sum;
1377 
1378     if (n == PETSC_DECIDE) {
1379       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1380     }
1381     /* Check sum(n) = N */
1382     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1383     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1384 
1385     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1386   }
1387 
1388   /* numeric phase */
1389   mat = (Mat_MPIDense*)(*outmat)->data;
1390   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1391   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1392   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1397 {
1398   Mat_MPIDense   *a;
1399   PetscErrorCode ierr;
1400 
1401   PetscFunctionBegin;
1402   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1403   mat->data = (void*)a;
1404   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1405 
1406   mat->insertmode = NOT_SET_VALUES;
1407   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1408   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1409 
1410   /* build cache for off array entries formed */
1411   a->donotstash = PETSC_FALSE;
1412 
1413   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1414 
1415   /* stuff used for matrix vector multiply */
1416   a->lvec        = 0;
1417   a->Mvctx       = 0;
1418   a->roworiented = PETSC_TRUE;
1419 
1420   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1422   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1423   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1424   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1425   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1426   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1427 #if defined(PETSC_HAVE_ELEMENTAL)
1428   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1429 #endif
1430   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1431   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1432   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1433   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_nest_mpidense_C",MatMatMult_Nest_Dense);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr);
1437 
1438   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1443   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 /*MC
1448    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1449 
1450    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1451    and MATMPIDENSE otherwise.
1452 
1453    Options Database Keys:
1454 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1455 
1456   Level: beginner
1457 
1458 
1459 .seealso: MATSEQDENSE,MATMPIDENSE
1460 M*/
1461 
1462 /*@C
1463    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1464 
1465    Not collective
1466 
1467    Input Parameters:
1468 .  B - the matrix
1469 -  data - optional location of matrix data.  Set data=NULL for PETSc
1470    to control all matrix memory allocation.
1471 
1472    Notes:
1473    The dense format is fully compatible with standard Fortran 77
1474    storage by columns.
1475 
1476    The data input variable is intended primarily for Fortran programmers
1477    who wish to allocate their own matrix memory space.  Most users should
1478    set data=NULL.
1479 
1480    Level: intermediate
1481 
1482 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1483 @*/
1484 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1485 {
1486   PetscErrorCode ierr;
1487 
1488   PetscFunctionBegin;
1489   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 /*@
1494    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1495    array provided by the user. This is useful to avoid copying an array
1496    into a matrix
1497 
1498    Not Collective
1499 
1500    Input Parameters:
1501 +  mat - the matrix
1502 -  array - the array in column major order
1503 
1504    Notes:
1505    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1506    freed when the matrix is destroyed.
1507 
1508    Level: developer
1509 
1510 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1511 
1512 @*/
1513 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1514 {
1515   PetscErrorCode ierr;
1516   PetscFunctionBegin;
1517   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1518   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 /*@
1523    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1524 
1525    Not Collective
1526 
1527    Input Parameters:
1528 .  mat - the matrix
1529 
1530    Notes:
1531    You can only call this after a call to MatDensePlaceArray()
1532 
1533    Level: developer
1534 
1535 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1536 
1537 @*/
1538 PetscErrorCode  MatDenseResetArray(Mat mat)
1539 {
1540   PetscErrorCode ierr;
1541   PetscFunctionBegin;
1542   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1543   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 /*@C
1548    MatCreateDense - Creates a parallel matrix in dense format.
1549 
1550    Collective
1551 
1552    Input Parameters:
1553 +  comm - MPI communicator
1554 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1555 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1556 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1557 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1558 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1559    to control all matrix memory allocation.
1560 
1561    Output Parameter:
1562 .  A - the matrix
1563 
1564    Notes:
1565    The dense format is fully compatible with standard Fortran 77
1566    storage by columns.
1567 
1568    The data input variable is intended primarily for Fortran programmers
1569    who wish to allocate their own matrix memory space.  Most users should
1570    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1571 
1572    The user MUST specify either the local or global matrix dimensions
1573    (possibly both).
1574 
1575    Level: intermediate
1576 
1577 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1578 @*/
1579 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1580 {
1581   PetscErrorCode ierr;
1582   PetscMPIInt    size;
1583 
1584   PetscFunctionBegin;
1585   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1586   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1587   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1588   if (size > 1) {
1589     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1590     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1591     if (data) {  /* user provided data array, so no need to assemble */
1592       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1593       (*A)->assembled = PETSC_TRUE;
1594     }
1595   } else {
1596     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1597     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1603 {
1604   Mat            mat;
1605   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1606   PetscErrorCode ierr;
1607 
1608   PetscFunctionBegin;
1609   *newmat = 0;
1610   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1611   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1612   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1613   a       = (Mat_MPIDense*)mat->data;
1614 
1615   mat->factortype   = A->factortype;
1616   mat->assembled    = PETSC_TRUE;
1617   mat->preallocated = PETSC_TRUE;
1618 
1619   a->size         = oldmat->size;
1620   a->rank         = oldmat->rank;
1621   mat->insertmode = NOT_SET_VALUES;
1622   a->nvec         = oldmat->nvec;
1623   a->donotstash   = oldmat->donotstash;
1624 
1625   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1626   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1627 
1628   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1629   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1630   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1631 
1632   *newmat = mat;
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1637 {
1638   PetscErrorCode ierr;
1639   PetscMPIInt    rank,size;
1640   const PetscInt *rowners;
1641   PetscInt       i,m,n,nz,j,mMax;
1642   PetscScalar    *array,*vals,*vals_ptr;
1643   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1644 
1645   PetscFunctionBegin;
1646   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1647   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1648 
1649   /* determine ownership of rows and columns */
1650   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1651   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1652 
1653   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1654   if (!a->A) {
1655     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1656   }
1657   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1658   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1659   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1660   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1661   if (!rank) {
1662     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1663 
1664     /* read in my part of the matrix numerical values  */
1665     ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1666 
1667     /* insert into matrix-by row (this is why cannot directly read into array */
1668     vals_ptr = vals;
1669     for (i=0; i<m; i++) {
1670       for (j=0; j<N; j++) {
1671         array[i + j*m] = *vals_ptr++;
1672       }
1673     }
1674 
1675     /* read in other processors and ship out */
1676     for (i=1; i<size; i++) {
1677       nz   = (rowners[i+1] - rowners[i])*N;
1678       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1679       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1680     }
1681   } else {
1682     /* receive numeric values */
1683     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1684 
1685     /* receive message of values*/
1686     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1687 
1688     /* insert into matrix-by row (this is why cannot directly read into array */
1689     vals_ptr = vals;
1690     for (i=0; i<m; i++) {
1691       for (j=0; j<N; j++) {
1692         array[i + j*m] = *vals_ptr++;
1693       }
1694     }
1695   }
1696   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1697   ierr = PetscFree(vals);CHKERRQ(ierr);
1698   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 static PetscErrorCode MatLoad_MPIDense_Binary(Mat newmat,PetscViewer viewer)
1704 {
1705   Mat_MPIDense   *a;
1706   PetscScalar    *vals,*svals;
1707   MPI_Comm       comm;
1708   MPI_Status     status;
1709   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1710   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1711   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1712   PetscInt       i,nz,j,rstart,rend;
1713   int            fd;
1714   PetscErrorCode ierr;
1715 
1716   PetscFunctionBegin;
1717   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1718   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1719   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1720   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1721   if (!rank) {
1722     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
1723     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1724   }
1725   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1726   M    = header[1]; N = header[2]; nz = header[3];
1727 
1728   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1729   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1730   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1731 
1732   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1733   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1734 
1735   /*
1736        Handle case where matrix is stored on disk as a dense matrix
1737   */
1738   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1739     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1740     PetscFunctionReturn(0);
1741   }
1742 
1743   /* determine ownership of all rows */
1744   if (newmat->rmap->n < 0) {
1745     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1746   } else {
1747     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1748   }
1749   if (newmat->cmap->n < 0) {
1750     n = PETSC_DECIDE;
1751   } else {
1752     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1753   }
1754 
1755   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1756   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1757   rowners[0] = 0;
1758   for (i=2; i<=size; i++) {
1759     rowners[i] += rowners[i-1];
1760   }
1761   rstart = rowners[rank];
1762   rend   = rowners[rank+1];
1763 
1764   /* distribute row lengths to all processors */
1765   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1766   if (!rank) {
1767     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1768     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
1769     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1770     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1771     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1772     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1773   } else {
1774     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1775   }
1776 
1777   if (!rank) {
1778     /* calculate the number of nonzeros on each processor */
1779     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
1780     for (i=0; i<size; i++) {
1781       for (j=rowners[i]; j< rowners[i+1]; j++) {
1782         procsnz[i] += rowlengths[j];
1783       }
1784     }
1785     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1786 
1787     /* determine max buffer needed and allocate it */
1788     maxnz = 0;
1789     for (i=0; i<size; i++) {
1790       maxnz = PetscMax(maxnz,procsnz[i]);
1791     }
1792     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1793 
1794     /* read in my part of the matrix column indices  */
1795     nz   = procsnz[0];
1796     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1797     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1798 
1799     /* read in every one elses and ship off */
1800     for (i=1; i<size; i++) {
1801       nz   = procsnz[i];
1802       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1803       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1804     }
1805     ierr = PetscFree(cols);CHKERRQ(ierr);
1806   } else {
1807     /* determine buffer space needed for message */
1808     nz = 0;
1809     for (i=0; i<m; i++) {
1810       nz += ourlens[i];
1811     }
1812     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1813 
1814     /* receive message of column indices*/
1815     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1816     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1817     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1818   }
1819 
1820   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1821   a = (Mat_MPIDense*)newmat->data;
1822   if (!a->A) {
1823     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1824   }
1825 
1826   if (!rank) {
1827     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1828 
1829     /* read in my part of the matrix numerical values  */
1830     nz   = procsnz[0];
1831     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1832 
1833     /* insert into matrix */
1834     jj      = rstart;
1835     smycols = mycols;
1836     svals   = vals;
1837     for (i=0; i<m; i++) {
1838       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1839       smycols += ourlens[i];
1840       svals   += ourlens[i];
1841       jj++;
1842     }
1843 
1844     /* read in other processors and ship out */
1845     for (i=1; i<size; i++) {
1846       nz   = procsnz[i];
1847       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1848       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1849     }
1850     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1851   } else {
1852     /* receive numeric values */
1853     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1854 
1855     /* receive message of values*/
1856     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1857     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1858     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1859 
1860     /* insert into matrix */
1861     jj      = rstart;
1862     smycols = mycols;
1863     svals   = vals;
1864     for (i=0; i<m; i++) {
1865       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1866       smycols += ourlens[i];
1867       svals   += ourlens[i];
1868       jj++;
1869     }
1870   }
1871   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1872   ierr = PetscFree(vals);CHKERRQ(ierr);
1873   ierr = PetscFree(mycols);CHKERRQ(ierr);
1874   ierr = PetscFree(rowners);CHKERRQ(ierr);
1875 
1876   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1877   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1882 {
1883   PetscErrorCode ierr;
1884   PetscBool      isbinary;
1885 #if defined(PETSC_HAVE_HDF5)
1886   PetscBool      ishdf5;
1887 #endif
1888 
1889   PetscFunctionBegin;
1890   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
1891   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
1892   /* force binary viewer to load .info file if it has not yet done so */
1893   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1894   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1895 #if defined(PETSC_HAVE_HDF5)
1896   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
1897 #endif
1898   if (isbinary) {
1899     ierr = MatLoad_MPIDense_Binary(newMat,viewer);CHKERRQ(ierr);
1900 #if defined(PETSC_HAVE_HDF5)
1901   } else if (ishdf5) {
1902     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
1903 #endif
1904   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1909 {
1910   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1911   Mat            a,b;
1912   PetscBool      flg;
1913   PetscErrorCode ierr;
1914 
1915   PetscFunctionBegin;
1916   a    = matA->A;
1917   b    = matB->A;
1918   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1919   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1924 {
1925   PetscErrorCode        ierr;
1926   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1927   Mat_TransMatMultDense *atb = a->atbdense;
1928 
1929   PetscFunctionBegin;
1930   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1931   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1932   ierr = PetscFree(atb);CHKERRQ(ierr);
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1937 {
1938   PetscErrorCode        ierr;
1939   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1940   Mat_MatTransMultDense *abt = a->abtdense;
1941 
1942   PetscFunctionBegin;
1943   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1944   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1945   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1946   ierr = PetscFree(abt);CHKERRQ(ierr);
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1951 {
1952   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1953   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1954   Mat_TransMatMultDense *atb = c->atbdense;
1955   PetscErrorCode ierr;
1956   MPI_Comm       comm;
1957   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1958   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1959   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1960   PetscScalar    _DOne=1.0,_DZero=0.0;
1961   PetscBLASInt   am,an,bn,aN;
1962   const PetscInt *ranges;
1963 
1964   PetscFunctionBegin;
1965   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1966   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1967   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1968 
1969   /* compute atbarray = aseq^T * bseq */
1970   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1971   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1972   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1973   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1974   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1975 
1976   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1977   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1978 
1979   /* arrange atbarray into sendbuf */
1980   k = 0;
1981   for (proc=0; proc<size; proc++) {
1982     for (j=0; j<cN; j++) {
1983       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1984     }
1985   }
1986   /* sum all atbarray to local values of C */
1987   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1988   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1989   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1994 {
1995   PetscErrorCode        ierr;
1996   Mat                   Cdense;
1997   MPI_Comm              comm;
1998   PetscMPIInt           size;
1999   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2000   Mat_MPIDense          *c;
2001   Mat_TransMatMultDense *atb;
2002 
2003   PetscFunctionBegin;
2004   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2005   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2006     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2007   }
2008 
2009   /* create matrix product Cdense */
2010   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2011   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2012   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2013   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2014   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2015   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2016   *C   = Cdense;
2017 
2018   /* create data structure for reuse Cdense */
2019   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2020   ierr = PetscNew(&atb);CHKERRQ(ierr);
2021   cM = Cdense->rmap->N;
2022   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
2023 
2024   c                    = (Mat_MPIDense*)Cdense->data;
2025   c->atbdense          = atb;
2026   atb->destroy         = Cdense->ops->destroy;
2027   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2032 {
2033   PetscErrorCode ierr;
2034 
2035   PetscFunctionBegin;
2036   if (scall == MAT_INITIAL_MATRIX) {
2037     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2038   }
2039   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
2044 {
2045   PetscErrorCode        ierr;
2046   Mat                   Cdense;
2047   MPI_Comm              comm;
2048   PetscMPIInt           i, size;
2049   PetscInt              maxRows, bufsiz;
2050   Mat_MPIDense          *c;
2051   PetscMPIInt           tag;
2052   const char            *algTypes[2] = {"allgatherv","cyclic"};
2053   PetscInt              alg, nalg = 2;
2054   Mat_MatTransMultDense *abt;
2055 
2056   PetscFunctionBegin;
2057   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2058   alg = 0; /* default is allgatherv */
2059   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2060   ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr);
2061   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2062   if (A->cmap->N != B->cmap->N) {
2063     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2064   }
2065 
2066   /* create matrix product Cdense */
2067   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2068   ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2069   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2070   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2071   ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr);
2072   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2073   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2074   *C   = Cdense;
2075 
2076   /* create data structure for reuse Cdense */
2077   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2078   ierr = PetscNew(&abt);CHKERRQ(ierr);
2079   abt->tag = tag;
2080   abt->alg = alg;
2081   switch (alg) {
2082   case 1:
2083     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2084     bufsiz = A->cmap->N * maxRows;
2085     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2086     break;
2087   default:
2088     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2089     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2090     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2091     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2092     break;
2093   }
2094 
2095   c                    = (Mat_MPIDense*)Cdense->data;
2096   c->abtdense          = abt;
2097   abt->destroy         = Cdense->ops->destroy;
2098   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2099   PetscFunctionReturn(0);
2100 }
2101 
2102 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2103 {
2104   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2105   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2106   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2107   Mat_MatTransMultDense *abt = c->abtdense;
2108   PetscErrorCode ierr;
2109   MPI_Comm       comm;
2110   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2111   PetscScalar    *sendbuf, *recvbuf=0, *carray;
2112   PetscInt       i,cK=A->cmap->N,k,j,bn;
2113   PetscScalar    _DOne=1.0,_DZero=0.0;
2114   PetscBLASInt   cm, cn, ck;
2115   MPI_Request    reqs[2];
2116   const PetscInt *ranges;
2117 
2118   PetscFunctionBegin;
2119   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2120   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2121   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2122 
2123   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2124   bn = B->rmap->n;
2125   if (bseq->lda == bn) {
2126     sendbuf = bseq->v;
2127   } else {
2128     sendbuf = abt->buf[0];
2129     for (k = 0, i = 0; i < cK; i++) {
2130       for (j = 0; j < bn; j++, k++) {
2131         sendbuf[k] = bseq->v[i * bseq->lda + j];
2132       }
2133     }
2134   }
2135   if (size > 1) {
2136     sendto = (rank + size - 1) % size;
2137     recvfrom = (rank + size + 1) % size;
2138   } else {
2139     sendto = recvfrom = 0;
2140   }
2141   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2142   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2143   recvisfrom = rank;
2144   for (i = 0; i < size; i++) {
2145     /* we have finished receiving in sending, bufs can be read/modified */
2146     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2147     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2148 
2149     if (nextrecvisfrom != rank) {
2150       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2151       sendsiz = cK * bn;
2152       recvsiz = cK * nextbn;
2153       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2154       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2155       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2156     }
2157 
2158     /* local aseq * sendbuf^T */
2159     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2160     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2161     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2162 
2163     if (nextrecvisfrom != rank) {
2164       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2165       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2166     }
2167     bn = nextbn;
2168     recvisfrom = nextrecvisfrom;
2169     sendbuf = recvbuf;
2170   }
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2175 {
2176   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2177   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2178   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2179   Mat_MatTransMultDense *abt = c->abtdense;
2180   PetscErrorCode ierr;
2181   MPI_Comm       comm;
2182   PetscMPIInt    rank,size;
2183   PetscScalar    *sendbuf, *recvbuf;
2184   PetscInt       i,cK=A->cmap->N,k,j,bn;
2185   PetscScalar    _DOne=1.0,_DZero=0.0;
2186   PetscBLASInt   cm, cn, ck;
2187 
2188   PetscFunctionBegin;
2189   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2190   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2191   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2192 
2193   /* copy transpose of B into buf[0] */
2194   bn      = B->rmap->n;
2195   sendbuf = abt->buf[0];
2196   recvbuf = abt->buf[1];
2197   for (k = 0, j = 0; j < bn; j++) {
2198     for (i = 0; i < cK; i++, k++) {
2199       sendbuf[k] = bseq->v[i * bseq->lda + j];
2200     }
2201   }
2202   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2203   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2204   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2205   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2206   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2211 {
2212   Mat_MPIDense   *c=(Mat_MPIDense*)C->data;
2213   Mat_MatTransMultDense *abt = c->abtdense;
2214   PetscErrorCode ierr;
2215 
2216   PetscFunctionBegin;
2217   switch (abt->alg) {
2218   case 1:
2219     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2220     break;
2221   default:
2222     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2223     break;
2224   }
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2229 {
2230   PetscErrorCode ierr;
2231 
2232   PetscFunctionBegin;
2233   if (scall == MAT_INITIAL_MATRIX) {
2234     ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2235   }
2236   ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2241 {
2242   PetscErrorCode   ierr;
2243   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2244   Mat_MatMultDense *ab = a->abdense;
2245 
2246   PetscFunctionBegin;
2247   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2248   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2249   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2250 
2251   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2252   ierr = PetscFree(ab);CHKERRQ(ierr);
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #if defined(PETSC_HAVE_ELEMENTAL)
2257 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2258 {
2259   PetscErrorCode   ierr;
2260   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2261   Mat_MatMultDense *ab=c->abdense;
2262 
2263   PetscFunctionBegin;
2264   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2265   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2266   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2267   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2272 {
2273   PetscErrorCode   ierr;
2274   Mat              Ae,Be,Ce;
2275   Mat_MPIDense     *c;
2276   Mat_MatMultDense *ab;
2277 
2278   PetscFunctionBegin;
2279   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2280     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2281   }
2282 
2283   /* convert A and B to Elemental matrices Ae and Be */
2284   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2285   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2286 
2287   /* Ce = Ae*Be */
2288   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2289   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2290 
2291   /* convert Ce to C */
2292   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2293 
2294   /* create data structure for reuse Cdense */
2295   ierr = PetscNew(&ab);CHKERRQ(ierr);
2296   c                  = (Mat_MPIDense*)(*C)->data;
2297   c->abdense         = ab;
2298 
2299   ab->Ae             = Ae;
2300   ab->Be             = Be;
2301   ab->Ce             = Ce;
2302   ab->destroy        = (*C)->ops->destroy;
2303   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2304   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2309 {
2310   PetscErrorCode ierr;
2311 
2312   PetscFunctionBegin;
2313   if (scall == MAT_INITIAL_MATRIX) { /* symbolic product includes numeric product */
2314     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2315     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2316     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2317   } else {
2318     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2319     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2320     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2321   }
2322   PetscFunctionReturn(0);
2323 }
2324 #endif
2325 
2326