xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision c4c812f100fac54d2d07a03feda19bf917b177bc)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188 {
189   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
190   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
191   PetscErrorCode ierr;
192   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193   const PetscInt *irow,*icol;
194   PetscScalar    *av,*bv,*v = lmat->v;
195   Mat            newmat;
196   IS             iscol_local;
197   MPI_Comm       comm_is,comm_mat;
198 
199   PetscFunctionBegin;
200   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
201   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
202   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
203 
204   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
205   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
206   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
207   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
208   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
209   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
210 
211   /* No parallel redistribution currently supported! Should really check each index set
212      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
213      original matrix! */
214 
215   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
216   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
217 
218   /* Check submatrix call */
219   if (scall == MAT_REUSE_MATRIX) {
220     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
221     /* Really need to test rows and column sizes! */
222     newmat = *B;
223   } else {
224     /* Create and fill new matrix */
225     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
226     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
227     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
228     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
229   }
230 
231   /* Now extract the data pointers and do the copy, column at a time */
232   newmatd = (Mat_MPIDense*)newmat->data;
233   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
234 
235   for (i=0; i<Ncols; i++) {
236     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
237     for (j=0; j<nrows; j++) {
238       *bv++ = av[irow[j] - rstart];
239     }
240   }
241 
242   /* Assemble the matrices so that the correct flags are set */
243   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
245 
246   /* Free work space */
247   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
248   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
249   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
250   *B   = newmat;
251   PetscFunctionReturn(0);
252 }
253 
254 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
255 {
256   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
257   PetscErrorCode ierr;
258 
259   PetscFunctionBegin;
260   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
275 {
276   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
277   MPI_Comm       comm;
278   PetscErrorCode ierr;
279   PetscInt       nstash,reallocs;
280   InsertMode     addv;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
284   /* make sure all processors are either in INSERTMODE or ADDMODE */
285   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
286   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
287   mat->insertmode = addv; /* in case this processor had no cache */
288 
289   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
290   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
291   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
296 {
297   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
298   PetscErrorCode ierr;
299   PetscInt       i,*row,*col,flg,j,rstart,ncols;
300   PetscMPIInt    n;
301   PetscScalar    *val;
302 
303   PetscFunctionBegin;
304   /*  wait on receives */
305   while (1) {
306     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
307     if (!flg) break;
308 
309     for (i=0; i<n;) {
310       /* Now identify the consecutive vals belonging to the same row */
311       for (j=i,rstart=row[j]; j<n; j++) {
312         if (row[j] != rstart) break;
313       }
314       if (j < n) ncols = j-i;
315       else       ncols = n-i;
316       /* Now assemble all these values with a single function call */
317       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
318       i    = j;
319     }
320   }
321   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
322 
323   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
324   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
325 
326   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
327     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
328   }
329   PetscFunctionReturn(0);
330 }
331 
332 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
333 {
334   PetscErrorCode ierr;
335   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
336 
337   PetscFunctionBegin;
338   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 
342 /* the code does not do the diagonal entries correctly unless the
343    matrix is square and the column and row owerships are identical.
344    This is a BUG. The only way to fix it seems to be to access
345    mdn->A and mdn->B directly and not through the MatZeroRows()
346    routine.
347 */
348 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
349 {
350   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
351   PetscErrorCode    ierr;
352   PetscInt          i,*owners = A->rmap->range;
353   PetscInt          *sizes,j,idx,nsends;
354   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
355   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
356   PetscInt          *lens,*lrows,*values;
357   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
358   MPI_Comm          comm;
359   MPI_Request       *send_waits,*recv_waits;
360   MPI_Status        recv_status,*send_status;
361   PetscBool         found;
362   const PetscScalar *xx;
363   PetscScalar       *bb;
364 
365   PetscFunctionBegin;
366   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
367   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
368   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
369   /*  first count number of contributors to each processor */
370   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
371   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
372   for (i=0; i<N; i++) {
373     idx   = rows[i];
374     found = PETSC_FALSE;
375     for (j=0; j<size; j++) {
376       if (idx >= owners[j] && idx < owners[j+1]) {
377         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
378       }
379     }
380     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
381   }
382   nsends = 0;
383   for (i=0; i<size; i++) nsends += sizes[2*i+1];
384 
385   /* inform other processors of number of messages and max length*/
386   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
387 
388   /* post receives:   */
389   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
390   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
391   for (i=0; i<nrecvs; i++) {
392     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
393   }
394 
395   /* do sends:
396       1) starts[i] gives the starting index in svalues for stuff going to
397          the ith processor
398   */
399   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
401   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
402 
403   starts[0] = 0;
404   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
405   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
406 
407   starts[0] = 0;
408   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
409   count = 0;
410   for (i=0; i<size; i++) {
411     if (sizes[2*i+1]) {
412       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
413     }
414   }
415   ierr = PetscFree(starts);CHKERRQ(ierr);
416 
417   base = owners[rank];
418 
419   /*  wait on receives */
420   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
421   count = nrecvs;
422   slen  = 0;
423   while (count) {
424     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
425     /* unpack receives into our local space */
426     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
427 
428     source[imdex] = recv_status.MPI_SOURCE;
429     lens[imdex]   = n;
430     slen += n;
431     count--;
432   }
433   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
434 
435   /* move the data into the send scatter */
436   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
437   count = 0;
438   for (i=0; i<nrecvs; i++) {
439     values = rvalues + i*nmax;
440     for (j=0; j<lens[i]; j++) {
441       lrows[count++] = values[j] - base;
442     }
443   }
444   ierr = PetscFree(rvalues);CHKERRQ(ierr);
445   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
446   ierr = PetscFree(owner);CHKERRQ(ierr);
447   ierr = PetscFree(sizes);CHKERRQ(ierr);
448 
449   /* fix right hand side if needed */
450   if (x && b) {
451     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
452     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
453     for (i=0; i<slen; i++) {
454       bb[lrows[i]] = diag*xx[lrows[i]];
455     }
456     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
457     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
458   }
459 
460   /* actually zap the local rows */
461   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
462   if (diag != 0.0) {
463     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
464     PetscInt     m   = ll->lda, i;
465 
466     for (i=0; i<slen; i++) {
467       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
468     }
469   }
470   ierr = PetscFree(lrows);CHKERRQ(ierr);
471 
472   /* wait on sends */
473   if (nsends) {
474     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
475     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
476     ierr = PetscFree(send_status);CHKERRQ(ierr);
477   }
478   ierr = PetscFree(send_waits);CHKERRQ(ierr);
479   ierr = PetscFree(svalues);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
484 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
485 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
486 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
487 
488 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
489 {
490   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
491   PetscErrorCode ierr;
492 
493   PetscFunctionBegin;
494   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
495   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
501 {
502   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
503   PetscErrorCode ierr;
504 
505   PetscFunctionBegin;
506   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
507   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
508   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
513 {
514   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
515   PetscErrorCode ierr;
516   PetscScalar    zero = 0.0;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,zero);CHKERRQ(ierr);
520   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
522   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
527 {
528   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
529   PetscErrorCode ierr;
530 
531   PetscFunctionBegin;
532   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
533   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
534   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
535   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
542   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
543   PetscErrorCode ierr;
544   PetscInt       len,i,n,m = A->rmap->n,radd;
545   PetscScalar    *x,zero = 0.0;
546 
547   PetscFunctionBegin;
548   ierr = VecSet(v,zero);CHKERRQ(ierr);
549   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
550   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
551   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
552   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
553   radd = A->rmap->rstart*m;
554   for (i=0; i<len; i++) {
555     x[i] = aloc->v[radd + i*m + i];
556   }
557   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 PetscErrorCode MatDestroy_MPIDense(Mat mat)
562 {
563   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
564   PetscErrorCode ierr;
565 
566   PetscFunctionBegin;
567 #if defined(PETSC_USE_LOG)
568   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
569 #endif
570   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
571   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
572   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
573   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
574 
575   ierr = PetscFree(mat->data);CHKERRQ(ierr);
576   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
577 
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
584 #if defined(PETSC_HAVE_ELEMENTAL)
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
586 #endif
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
600 {
601   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
602   PetscErrorCode    ierr;
603   PetscViewerFormat format;
604   int               fd;
605   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
606   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
607   PetscScalar       *work,*v,*vv;
608   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
609 
610   PetscFunctionBegin;
611   if (mdn->size == 1) {
612     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
613   } else {
614     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
615     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
616     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
617 
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_NATIVE) {
620 
621       if (!rank) {
622         /* store the matrix as a dense matrix */
623         header[0] = MAT_FILE_CLASSID;
624         header[1] = mat->rmap->N;
625         header[2] = N;
626         header[3] = MATRIX_BINARY_FORMAT_DENSE;
627         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
628 
629         /* get largest work array needed for transposing array */
630         mmax = mat->rmap->n;
631         for (i=1; i<size; i++) {
632           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
633         }
634         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
635 
636         /* write out local array, by rows */
637         m = mat->rmap->n;
638         v = a->v;
639         for (j=0; j<N; j++) {
640           for (i=0; i<m; i++) {
641             work[j + i*N] = *v++;
642           }
643         }
644         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
645         /* get largest work array to receive messages from other processes, excludes process zero */
646         mmax = 0;
647         for (i=1; i<size; i++) {
648           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
649         }
650         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
651         for (k = 1; k < size; k++) {
652           v    = vv;
653           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
654           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
655 
656           for (j = 0; j < N; j++) {
657             for (i = 0; i < m; i++) {
658               work[j + i*N] = *v++;
659             }
660           }
661           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
662         }
663         ierr = PetscFree(work);CHKERRQ(ierr);
664         ierr = PetscFree(vv);CHKERRQ(ierr);
665       } else {
666         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
667       }
668     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
674 #include <petscdraw.h>
675 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
676 {
677   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
678   PetscErrorCode    ierr;
679   PetscMPIInt       rank = mdn->rank;
680   PetscViewerType   vtype;
681   PetscBool         iascii,isdraw;
682   PetscViewer       sviewer;
683   PetscViewerFormat format;
684 
685   PetscFunctionBegin;
686   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
687   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
688   if (iascii) {
689     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
690     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
691     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
692       MatInfo info;
693       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
694       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
695       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
697       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
698       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     } else if (format == PETSC_VIEWER_ASCII_INFO) {
701       PetscFunctionReturn(0);
702     }
703   } else if (isdraw) {
704     PetscDraw draw;
705     PetscBool isnull;
706 
707     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
708     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
709     if (isnull) PetscFunctionReturn(0);
710   }
711 
712   {
713     /* assemble the entire matrix onto first processor. */
714     Mat         A;
715     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
716     PetscInt    *cols;
717     PetscScalar *vals;
718 
719     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
720     if (!rank) {
721       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
722     } else {
723       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
724     }
725     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
726     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
727     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
728     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
729 
730     /* Copy the matrix ... This isn't the most efficient means,
731        but it's quick for now */
732     A->insertmode = INSERT_VALUES;
733 
734     row = mat->rmap->rstart;
735     m   = mdn->A->rmap->n;
736     for (i=0; i<m; i++) {
737       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
738       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
739       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
740       row++;
741     }
742 
743     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
744     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
746     if (!rank) {
747       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
748       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
749     }
750     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
751     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
752     ierr = MatDestroy(&A);CHKERRQ(ierr);
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
758 {
759   PetscErrorCode ierr;
760   PetscBool      iascii,isbinary,isdraw,issocket;
761 
762   PetscFunctionBegin;
763   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
764   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
765   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
766   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
767 
768   if (iascii || issocket || isdraw) {
769     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
770   } else if (isbinary) {
771     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
777 {
778   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
779   Mat            mdn  = mat->A;
780   PetscErrorCode ierr;
781   PetscReal      isend[5],irecv[5];
782 
783   PetscFunctionBegin;
784   info->block_size = 1.0;
785 
786   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
787 
788   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
789   isend[3] = info->memory;  isend[4] = info->mallocs;
790   if (flag == MAT_LOCAL) {
791     info->nz_used      = isend[0];
792     info->nz_allocated = isend[1];
793     info->nz_unneeded  = isend[2];
794     info->memory       = isend[3];
795     info->mallocs      = isend[4];
796   } else if (flag == MAT_GLOBAL_MAX) {
797     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
798 
799     info->nz_used      = irecv[0];
800     info->nz_allocated = irecv[1];
801     info->nz_unneeded  = irecv[2];
802     info->memory       = irecv[3];
803     info->mallocs      = irecv[4];
804   } else if (flag == MAT_GLOBAL_SUM) {
805     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
806 
807     info->nz_used      = irecv[0];
808     info->nz_allocated = irecv[1];
809     info->nz_unneeded  = irecv[2];
810     info->memory       = irecv[3];
811     info->mallocs      = irecv[4];
812   }
813   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
814   info->fill_ratio_needed = 0;
815   info->factor_mallocs    = 0;
816   PetscFunctionReturn(0);
817 }
818 
819 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
820 {
821   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   switch (op) {
826   case MAT_NEW_NONZERO_LOCATIONS:
827   case MAT_NEW_NONZERO_LOCATION_ERR:
828   case MAT_NEW_NONZERO_ALLOCATION_ERR:
829     MatCheckPreallocated(A,1);
830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
831     break;
832   case MAT_ROW_ORIENTED:
833     MatCheckPreallocated(A,1);
834     a->roworiented = flg;
835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
836     break;
837   case MAT_NEW_DIAGONALS:
838   case MAT_KEEP_NONZERO_PATTERN:
839   case MAT_USE_HASH_TABLE:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   case MAT_IGNORE_OFF_PROC_ENTRIES:
843     a->donotstash = flg;
844     break;
845   case MAT_SYMMETRIC:
846   case MAT_STRUCTURALLY_SYMMETRIC:
847   case MAT_HERMITIAN:
848   case MAT_SYMMETRY_ETERNAL:
849   case MAT_IGNORE_LOWER_TRIANGULAR:
850     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
851     break;
852   default:
853     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
854   }
855   PetscFunctionReturn(0);
856 }
857 
858 
859 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
860 {
861   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
862   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
863   const PetscScalar *l,*r;
864   PetscScalar       x,*v;
865   PetscErrorCode    ierr;
866   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
867 
868   PetscFunctionBegin;
869   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
870   if (ll) {
871     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
872     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
873     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
874     for (i=0; i<m; i++) {
875       x = l[i];
876       v = mat->v + i;
877       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
878     }
879     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
880     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
881   }
882   if (rr) {
883     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
884     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
885     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
888     for (i=0; i<n; i++) {
889       x = r[i];
890       v = mat->v + i*m;
891       for (j=0; j<m; j++) (*v++) *= x;
892     }
893     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
894     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
895   }
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
900 {
901   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
902   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
903   PetscErrorCode ierr;
904   PetscInt       i,j;
905   PetscReal      sum = 0.0;
906   PetscScalar    *v  = mat->v;
907 
908   PetscFunctionBegin;
909   if (mdn->size == 1) {
910     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
911   } else {
912     if (type == NORM_FROBENIUS) {
913       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
914         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
915       }
916       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
917       *nrm = PetscSqrtReal(*nrm);
918       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
919     } else if (type == NORM_1) {
920       PetscReal *tmp,*tmp2;
921       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
922       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
923       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
924       *nrm = 0.0;
925       v    = mat->v;
926       for (j=0; j<mdn->A->cmap->n; j++) {
927         for (i=0; i<mdn->A->rmap->n; i++) {
928           tmp[j] += PetscAbsScalar(*v);  v++;
929         }
930       }
931       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
932       for (j=0; j<A->cmap->N; j++) {
933         if (tmp2[j] > *nrm) *nrm = tmp2[j];
934       }
935       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
936       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
937     } else if (type == NORM_INFINITY) { /* max row norm */
938       PetscReal ntemp;
939       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
940       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
947 {
948   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
949   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
950   Mat            B;
951   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
952   PetscErrorCode ierr;
953   PetscInt       j,i;
954   PetscScalar    *v;
955 
956   PetscFunctionBegin;
957   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
958     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
959     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
960     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
961     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
962   } else {
963     B = *matout;
964   }
965 
966   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
967   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
968   for (i=0; i<m; i++) rwork[i] = rstart + i;
969   for (j=0; j<n; j++) {
970     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
971     v   += m;
972   }
973   ierr = PetscFree(rwork);CHKERRQ(ierr);
974   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
975   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
977     *matout = B;
978   } else {
979     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 
985 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
986 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
987 
988 PetscErrorCode MatSetUp_MPIDense(Mat A)
989 {
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
998 {
999   PetscErrorCode ierr;
1000   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1001 
1002   PetscFunctionBegin;
1003   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1004   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1009 {
1010   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 PetscErrorCode MatRealPart_MPIDense(Mat A)
1019 {
1020   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1029 {
1030   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1039 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1040 {
1041   PetscErrorCode ierr;
1042   PetscInt       i,n;
1043   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1044   PetscReal      *work;
1045 
1046   PetscFunctionBegin;
1047   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1049   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1050   if (type == NORM_2) {
1051     for (i=0; i<n; i++) work[i] *= work[i];
1052   }
1053   if (type == NORM_INFINITY) {
1054     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1055   } else {
1056     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1057   }
1058   ierr = PetscFree(work);CHKERRQ(ierr);
1059   if (type == NORM_2) {
1060     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1066 {
1067   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1068   PetscErrorCode ierr;
1069   PetscScalar    *a;
1070   PetscInt       m,n,i;
1071 
1072   PetscFunctionBegin;
1073   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1074   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1075   for (i=0; i<m*n; i++) {
1076     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1077   }
1078   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1083 
1084 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1085 {
1086   PetscFunctionBegin;
1087   *missing = PETSC_FALSE;
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 /* -------------------------------------------------------------------*/
1092 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1093                                         MatGetRow_MPIDense,
1094                                         MatRestoreRow_MPIDense,
1095                                         MatMult_MPIDense,
1096                                 /*  4*/ MatMultAdd_MPIDense,
1097                                         MatMultTranspose_MPIDense,
1098                                         MatMultTransposeAdd_MPIDense,
1099                                         0,
1100                                         0,
1101                                         0,
1102                                 /* 10*/ 0,
1103                                         0,
1104                                         0,
1105                                         0,
1106                                         MatTranspose_MPIDense,
1107                                 /* 15*/ MatGetInfo_MPIDense,
1108                                         MatEqual_MPIDense,
1109                                         MatGetDiagonal_MPIDense,
1110                                         MatDiagonalScale_MPIDense,
1111                                         MatNorm_MPIDense,
1112                                 /* 20*/ MatAssemblyBegin_MPIDense,
1113                                         MatAssemblyEnd_MPIDense,
1114                                         MatSetOption_MPIDense,
1115                                         MatZeroEntries_MPIDense,
1116                                 /* 24*/ MatZeroRows_MPIDense,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                         0,
1121                                 /* 29*/ MatSetUp_MPIDense,
1122                                         0,
1123                                         0,
1124                                         MatGetDiagonalBlock_MPIDense,
1125                                         0,
1126                                 /* 34*/ MatDuplicate_MPIDense,
1127                                         0,
1128                                         0,
1129                                         0,
1130                                         0,
1131                                 /* 39*/ MatAXPY_MPIDense,
1132                                         MatCreateSubMatrices_MPIDense,
1133                                         0,
1134                                         MatGetValues_MPIDense,
1135                                         0,
1136                                 /* 44*/ 0,
1137                                         MatScale_MPIDense,
1138                                         MatShift_Basic,
1139                                         0,
1140                                         0,
1141                                 /* 49*/ MatSetRandom_MPIDense,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                         0,
1146                                 /* 54*/ 0,
1147                                         0,
1148                                         0,
1149                                         0,
1150                                         0,
1151                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1152                                         MatDestroy_MPIDense,
1153                                         MatView_MPIDense,
1154                                         0,
1155                                         0,
1156                                 /* 64*/ 0,
1157                                         0,
1158                                         0,
1159                                         0,
1160                                         0,
1161                                 /* 69*/ 0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                         0,
1166                                 /* 74*/ 0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                         0,
1171                                 /* 79*/ 0,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                 /* 83*/ MatLoad_MPIDense,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                         0,
1181 #if defined(PETSC_HAVE_ELEMENTAL)
1182                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1183                                         MatMatMultSymbolic_MPIDense_MPIDense,
1184 #else
1185                                 /* 89*/ 0,
1186                                         0,
1187 #endif
1188                                         MatMatMultNumeric_MPIDense,
1189                                         0,
1190                                         0,
1191                                 /* 94*/ 0,
1192                                         0,
1193                                         0,
1194                                         0,
1195                                         0,
1196                                 /* 99*/ 0,
1197                                         0,
1198                                         0,
1199                                         MatConjugate_MPIDense,
1200                                         0,
1201                                 /*104*/ 0,
1202                                         MatRealPart_MPIDense,
1203                                         MatImaginaryPart_MPIDense,
1204                                         0,
1205                                         0,
1206                                 /*109*/ 0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                         MatMissingDiagonal_MPIDense,
1211                                 /*114*/ 0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                         0,
1216                                 /*119*/ 0,
1217                                         0,
1218                                         0,
1219                                         0,
1220                                         0,
1221                                 /*124*/ 0,
1222                                         MatGetColumnNorms_MPIDense,
1223                                         0,
1224                                         0,
1225                                         0,
1226                                 /*129*/ 0,
1227                                         MatTransposeMatMult_MPIDense_MPIDense,
1228                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1229                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1230                                         0,
1231                                 /*134*/ 0,
1232                                         0,
1233                                         0,
1234                                         0,
1235                                         0,
1236                                 /*139*/ 0,
1237                                         0,
1238                                         0,
1239                                         0,
1240                                         0,
1241                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1242 };
1243 
1244 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1245 {
1246   Mat_MPIDense   *a;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1251   mat->preallocated = PETSC_TRUE;
1252   /* Note:  For now, when data is specified above, this assumes the user correctly
1253    allocates the local dense storage space.  We should add error checking. */
1254 
1255   a       = (Mat_MPIDense*)mat->data;
1256   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1257   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1258   a->nvec = mat->cmap->n;
1259 
1260   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1261   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1262   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1263   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1264   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #if defined(PETSC_HAVE_ELEMENTAL)
1269 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1270 {
1271   Mat            mat_elemental;
1272   PetscErrorCode ierr;
1273   PetscScalar    *v;
1274   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1275 
1276   PetscFunctionBegin;
1277   if (reuse == MAT_REUSE_MATRIX) {
1278     mat_elemental = *newmat;
1279     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1280   } else {
1281     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1282     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1283     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1284     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1285     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1286   }
1287 
1288   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1289   for (i=0; i<N; i++) cols[i] = i;
1290   for (i=0; i<m; i++) rows[i] = rstart + i;
1291 
1292   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1293   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1294   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1295   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1296   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1297   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1298   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1299 
1300   if (reuse == MAT_INPLACE_MATRIX) {
1301     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1302   } else {
1303     *newmat = mat_elemental;
1304   }
1305   PetscFunctionReturn(0);
1306 }
1307 #endif
1308 
1309 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1310 {
1311   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1320 {
1321   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1322   PetscErrorCode ierr;
1323 
1324   PetscFunctionBegin;
1325   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1330 {
1331   PetscErrorCode ierr;
1332   Mat_MPIDense   *mat;
1333   PetscInt       m,nloc,N;
1334 
1335   PetscFunctionBegin;
1336   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1337   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1338   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1339     PetscInt sum;
1340 
1341     if (n == PETSC_DECIDE) {
1342       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1343     }
1344     /* Check sum(n) = N */
1345     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1346     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1347 
1348     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1349   }
1350 
1351   /* numeric phase */
1352   mat = (Mat_MPIDense*)(*outmat)->data;
1353   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1354   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1355   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1360 {
1361   Mat_MPIDense   *a;
1362   PetscErrorCode ierr;
1363 
1364   PetscFunctionBegin;
1365   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1366   mat->data = (void*)a;
1367   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1368 
1369   mat->insertmode = NOT_SET_VALUES;
1370   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1371   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1372 
1373   /* build cache for off array entries formed */
1374   a->donotstash = PETSC_FALSE;
1375 
1376   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1377 
1378   /* stuff used for matrix vector multiply */
1379   a->lvec        = 0;
1380   a->Mvctx       = 0;
1381   a->roworiented = PETSC_TRUE;
1382 
1383   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1384   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1385   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1386   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1387   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1388   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1389 #if defined(PETSC_HAVE_ELEMENTAL)
1390   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1391 #endif
1392   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1393   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1394   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1395   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1396 
1397   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1398   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1399   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1400   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1401   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1402   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 /*MC
1407    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1408 
1409    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1410    and MATMPIDENSE otherwise.
1411 
1412    Options Database Keys:
1413 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1414 
1415   Level: beginner
1416 
1417 
1418 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1419 M*/
1420 
1421 /*@C
1422    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1423 
1424    Not collective
1425 
1426    Input Parameters:
1427 .  B - the matrix
1428 -  data - optional location of matrix data.  Set data=NULL for PETSc
1429    to control all matrix memory allocation.
1430 
1431    Notes:
1432    The dense format is fully compatible with standard Fortran 77
1433    storage by columns.
1434 
1435    The data input variable is intended primarily for Fortran programmers
1436    who wish to allocate their own matrix memory space.  Most users should
1437    set data=NULL.
1438 
1439    Level: intermediate
1440 
1441 .keywords: matrix,dense, parallel
1442 
1443 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1444 @*/
1445 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1446 {
1447   PetscErrorCode ierr;
1448 
1449   PetscFunctionBegin;
1450   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 /*@
1455    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1456    array provided by the user. This is useful to avoid copying an array
1457    into a matrix
1458 
1459    Not Collective
1460 
1461    Input Parameters:
1462 +  mat - the matrix
1463 -  array - the array in column major order
1464 
1465    Notes:
1466    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1467    freed when the matrix is destroyed.
1468 
1469    Level: developer
1470 
1471 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1472 
1473 @*/
1474 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1475 {
1476   PetscErrorCode ierr;
1477   PetscFunctionBegin;
1478   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1479   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 /*@
1484    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1485 
1486    Not Collective
1487 
1488    Input Parameters:
1489 .  mat - the matrix
1490 
1491    Notes:
1492    You can only call this after a call to MatDensePlaceArray()
1493 
1494    Level: developer
1495 
1496 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1497 
1498 @*/
1499 PetscErrorCode  MatDenseResetArray(Mat mat)
1500 {
1501   PetscErrorCode ierr;
1502   PetscFunctionBegin;
1503   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1504   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 /*@C
1509    MatCreateDense - Creates a parallel matrix in dense format.
1510 
1511    Collective on MPI_Comm
1512 
1513    Input Parameters:
1514 +  comm - MPI communicator
1515 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1516 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1517 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1518 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1519 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1520    to control all matrix memory allocation.
1521 
1522    Output Parameter:
1523 .  A - the matrix
1524 
1525    Notes:
1526    The dense format is fully compatible with standard Fortran 77
1527    storage by columns.
1528 
1529    The data input variable is intended primarily for Fortran programmers
1530    who wish to allocate their own matrix memory space.  Most users should
1531    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1532 
1533    The user MUST specify either the local or global matrix dimensions
1534    (possibly both).
1535 
1536    Level: intermediate
1537 
1538 .keywords: matrix,dense, parallel
1539 
1540 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1541 @*/
1542 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1543 {
1544   PetscErrorCode ierr;
1545   PetscMPIInt    size;
1546 
1547   PetscFunctionBegin;
1548   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1549   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1550   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1551   if (size > 1) {
1552     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1553     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1554     if (data) {  /* user provided data array, so no need to assemble */
1555       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1556       (*A)->assembled = PETSC_TRUE;
1557     }
1558   } else {
1559     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1560     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1566 {
1567   Mat            mat;
1568   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1569   PetscErrorCode ierr;
1570 
1571   PetscFunctionBegin;
1572   *newmat = 0;
1573   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1574   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1575   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1576   a       = (Mat_MPIDense*)mat->data;
1577   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1578 
1579   mat->factortype   = A->factortype;
1580   mat->assembled    = PETSC_TRUE;
1581   mat->preallocated = PETSC_TRUE;
1582 
1583   a->size         = oldmat->size;
1584   a->rank         = oldmat->rank;
1585   mat->insertmode = NOT_SET_VALUES;
1586   a->nvec         = oldmat->nvec;
1587   a->donotstash   = oldmat->donotstash;
1588 
1589   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1590   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1591 
1592   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1593   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1594   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1595 
1596   *newmat = mat;
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1601 {
1602   PetscErrorCode ierr;
1603   PetscMPIInt    rank,size;
1604   const PetscInt *rowners;
1605   PetscInt       i,m,n,nz,j,mMax;
1606   PetscScalar    *array,*vals,*vals_ptr;
1607   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1608 
1609   PetscFunctionBegin;
1610   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1611   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1612 
1613   /* determine ownership of rows and columns */
1614   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1615   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1616 
1617   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1618   if (!a->A) {
1619     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1620   }
1621   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1622   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1623   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1624   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1625   if (!rank) {
1626     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1627 
1628     /* read in my part of the matrix numerical values  */
1629     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1630 
1631     /* insert into matrix-by row (this is why cannot directly read into array */
1632     vals_ptr = vals;
1633     for (i=0; i<m; i++) {
1634       for (j=0; j<N; j++) {
1635         array[i + j*m] = *vals_ptr++;
1636       }
1637     }
1638 
1639     /* read in other processors and ship out */
1640     for (i=1; i<size; i++) {
1641       nz   = (rowners[i+1] - rowners[i])*N;
1642       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1643       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1644     }
1645   } else {
1646     /* receive numeric values */
1647     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1648 
1649     /* receive message of values*/
1650     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1651 
1652     /* insert into matrix-by row (this is why cannot directly read into array */
1653     vals_ptr = vals;
1654     for (i=0; i<m; i++) {
1655       for (j=0; j<N; j++) {
1656         array[i + j*m] = *vals_ptr++;
1657       }
1658     }
1659   }
1660   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1661   ierr = PetscFree(vals);CHKERRQ(ierr);
1662   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1663   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1668 {
1669   Mat_MPIDense   *a;
1670   PetscScalar    *vals,*svals;
1671   MPI_Comm       comm;
1672   MPI_Status     status;
1673   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1674   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1675   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1676   PetscInt       i,nz,j,rstart,rend;
1677   int            fd;
1678   PetscBool      isbinary;
1679   PetscErrorCode ierr;
1680 
1681   PetscFunctionBegin;
1682   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1683   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
1684 
1685   /* force binary viewer to load .info file if it has not yet done so */
1686   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1687   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1688   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1689   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1690   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1691   if (!rank) {
1692     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1693     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1694   }
1695   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1696   M    = header[1]; N = header[2]; nz = header[3];
1697 
1698   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1699   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1700   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1701 
1702   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1703   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1704 
1705   /*
1706        Handle case where matrix is stored on disk as a dense matrix
1707   */
1708   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1709     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1710     PetscFunctionReturn(0);
1711   }
1712 
1713   /* determine ownership of all rows */
1714   if (newmat->rmap->n < 0) {
1715     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1716   } else {
1717     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1718   }
1719   if (newmat->cmap->n < 0) {
1720     n = PETSC_DECIDE;
1721   } else {
1722     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1723   }
1724 
1725   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1726   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1727   rowners[0] = 0;
1728   for (i=2; i<=size; i++) {
1729     rowners[i] += rowners[i-1];
1730   }
1731   rstart = rowners[rank];
1732   rend   = rowners[rank+1];
1733 
1734   /* distribute row lengths to all processors */
1735   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1736   if (!rank) {
1737     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1738     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1739     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1740     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1741     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1742     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1743   } else {
1744     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1745   }
1746 
1747   if (!rank) {
1748     /* calculate the number of nonzeros on each processor */
1749     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1750     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1751     for (i=0; i<size; i++) {
1752       for (j=rowners[i]; j< rowners[i+1]; j++) {
1753         procsnz[i] += rowlengths[j];
1754       }
1755     }
1756     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1757 
1758     /* determine max buffer needed and allocate it */
1759     maxnz = 0;
1760     for (i=0; i<size; i++) {
1761       maxnz = PetscMax(maxnz,procsnz[i]);
1762     }
1763     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1764 
1765     /* read in my part of the matrix column indices  */
1766     nz   = procsnz[0];
1767     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1768     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1769 
1770     /* read in every one elses and ship off */
1771     for (i=1; i<size; i++) {
1772       nz   = procsnz[i];
1773       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1774       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1775     }
1776     ierr = PetscFree(cols);CHKERRQ(ierr);
1777   } else {
1778     /* determine buffer space needed for message */
1779     nz = 0;
1780     for (i=0; i<m; i++) {
1781       nz += ourlens[i];
1782     }
1783     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1784 
1785     /* receive message of column indices*/
1786     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1787     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1788     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1789   }
1790 
1791   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1792   a = (Mat_MPIDense*)newmat->data;
1793   if (!a->A) {
1794     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1795   }
1796 
1797   if (!rank) {
1798     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1799 
1800     /* read in my part of the matrix numerical values  */
1801     nz   = procsnz[0];
1802     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1803 
1804     /* insert into matrix */
1805     jj      = rstart;
1806     smycols = mycols;
1807     svals   = vals;
1808     for (i=0; i<m; i++) {
1809       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1810       smycols += ourlens[i];
1811       svals   += ourlens[i];
1812       jj++;
1813     }
1814 
1815     /* read in other processors and ship out */
1816     for (i=1; i<size; i++) {
1817       nz   = procsnz[i];
1818       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1819       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1820     }
1821     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1822   } else {
1823     /* receive numeric values */
1824     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1825 
1826     /* receive message of values*/
1827     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1828     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1829     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1830 
1831     /* insert into matrix */
1832     jj      = rstart;
1833     smycols = mycols;
1834     svals   = vals;
1835     for (i=0; i<m; i++) {
1836       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1837       smycols += ourlens[i];
1838       svals   += ourlens[i];
1839       jj++;
1840     }
1841   }
1842   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1843   ierr = PetscFree(vals);CHKERRQ(ierr);
1844   ierr = PetscFree(mycols);CHKERRQ(ierr);
1845   ierr = PetscFree(rowners);CHKERRQ(ierr);
1846 
1847   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1848   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1853 {
1854   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1855   Mat            a,b;
1856   PetscBool      flg;
1857   PetscErrorCode ierr;
1858 
1859   PetscFunctionBegin;
1860   a    = matA->A;
1861   b    = matB->A;
1862   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1863   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1868 {
1869   PetscErrorCode        ierr;
1870   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1871   Mat_TransMatMultDense *atb = a->atbdense;
1872 
1873   PetscFunctionBegin;
1874   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1875   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1876   ierr = PetscFree(atb);CHKERRQ(ierr);
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1881 {
1882   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1883   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1884   Mat_TransMatMultDense *atb = c->atbdense;
1885   PetscErrorCode ierr;
1886   MPI_Comm       comm;
1887   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1888   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1889   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1890   PetscScalar    _DOne=1.0,_DZero=0.0;
1891   PetscBLASInt   am,an,bn,aN;
1892   const PetscInt *ranges;
1893 
1894   PetscFunctionBegin;
1895   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1896   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1897   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1898 
1899   /* compute atbarray = aseq^T * bseq */
1900   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1901   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1902   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1903   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1904   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1905 
1906   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1907   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1908 
1909   /* arrange atbarray into sendbuf */
1910   k = 0;
1911   for (proc=0; proc<size; proc++) {
1912     for (j=0; j<cN; j++) {
1913       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1914     }
1915   }
1916   /* sum all atbarray to local values of C */
1917   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1918   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1919   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1924 {
1925   PetscErrorCode        ierr;
1926   Mat                   Cdense;
1927   MPI_Comm              comm;
1928   PetscMPIInt           size;
1929   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1930   Mat_MPIDense          *c;
1931   Mat_TransMatMultDense *atb;
1932 
1933   PetscFunctionBegin;
1934   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1935   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1936     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1937   }
1938 
1939   /* create matrix product Cdense */
1940   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1941   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1942   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1943   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1944   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1945   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1946   *C   = Cdense;
1947 
1948   /* create data structure for reuse Cdense */
1949   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1950   ierr = PetscNew(&atb);CHKERRQ(ierr);
1951   cM = Cdense->rmap->N;
1952   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1953 
1954   c                    = (Mat_MPIDense*)Cdense->data;
1955   c->atbdense          = atb;
1956   atb->destroy         = Cdense->ops->destroy;
1957   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1962 {
1963   PetscErrorCode ierr;
1964 
1965   PetscFunctionBegin;
1966   if (scall == MAT_INITIAL_MATRIX) {
1967     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1968   }
1969   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1974 {
1975   PetscErrorCode   ierr;
1976   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1977   Mat_MatMultDense *ab = a->abdense;
1978 
1979   PetscFunctionBegin;
1980   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1981   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1982   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1983 
1984   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1985   ierr = PetscFree(ab);CHKERRQ(ierr);
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #if defined(PETSC_HAVE_ELEMENTAL)
1990 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1991 {
1992   PetscErrorCode   ierr;
1993   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1994   Mat_MatMultDense *ab=c->abdense;
1995 
1996   PetscFunctionBegin;
1997   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1998   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1999   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2000   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2005 {
2006   PetscErrorCode   ierr;
2007   Mat              Ae,Be,Ce;
2008   Mat_MPIDense     *c;
2009   Mat_MatMultDense *ab;
2010 
2011   PetscFunctionBegin;
2012   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2013     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2014   }
2015 
2016   /* convert A and B to Elemental matrices Ae and Be */
2017   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2018   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2019 
2020   /* Ce = Ae*Be */
2021   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2022   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2023 
2024   /* convert Ce to C */
2025   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2026 
2027   /* create data structure for reuse Cdense */
2028   ierr = PetscNew(&ab);CHKERRQ(ierr);
2029   c                  = (Mat_MPIDense*)(*C)->data;
2030   c->abdense         = ab;
2031 
2032   ab->Ae             = Ae;
2033   ab->Be             = Be;
2034   ab->Ce             = Ce;
2035   ab->destroy        = (*C)->ops->destroy;
2036   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2037   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2042 {
2043   PetscErrorCode ierr;
2044 
2045   PetscFunctionBegin;
2046   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2047     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2048     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2049     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2050   } else {
2051     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2052     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2053     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2054   }
2055   PetscFunctionReturn(0);
2056 }
2057 #endif
2058 
2059