xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 8cc058d9cd56c1ccb3be12a47760ddfc446aaffc)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatDenseGetLocalMatrix"
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRow_MPIDense"
40 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
41 {
42   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
45 
46   PetscFunctionBegin;
47   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
48   lrow = row - rstart;
49   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatRestoreRow_MPIDense"
55 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
56 {
57   PetscErrorCode ierr;
58 
59   PetscFunctionBegin;
60   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
61   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
62   PetscFunctionReturn(0);
63 }
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
67 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
68 {
69   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
70   PetscErrorCode ierr;
71   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
72   PetscScalar    *array;
73   MPI_Comm       comm;
74   Mat            B;
75 
76   PetscFunctionBegin;
77   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
78 
79   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
80   if (!B) {
81     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
82     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
83     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
84     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
85     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
86     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
87     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
88     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
89     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
90     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
91     *a   = B;
92     ierr = MatDestroy(&B);CHKERRQ(ierr);
93   } else *a = B;
94   PetscFunctionReturn(0);
95 }
96 
97 #undef __FUNCT__
98 #define __FUNCT__ "MatSetValues_MPIDense"
99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
100 {
101   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
102   PetscErrorCode ierr;
103   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
104   PetscBool      roworiented = A->roworiented;
105 
106   PetscFunctionBegin;
107   if (v) PetscValidScalarPointer(v,6);
108   for (i=0; i<m; i++) {
109     if (idxm[i] < 0) continue;
110     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
111     if (idxm[i] >= rstart && idxm[i] < rend) {
112       row = idxm[i] - rstart;
113       if (roworiented) {
114         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
115       } else {
116         for (j=0; j<n; j++) {
117           if (idxn[j] < 0) continue;
118           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
119           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
120         }
121       }
122     } else if (!A->donotstash) {
123       mat->assembled = PETSC_FALSE;
124       if (roworiented) {
125         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
126       } else {
127         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
128       }
129     }
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 #undef __FUNCT__
135 #define __FUNCT__ "MatGetValues_MPIDense"
136 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
137 {
138   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
141 
142   PetscFunctionBegin;
143   for (i=0; i<m; i++) {
144     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
145     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
146     if (idxm[i] >= rstart && idxm[i] < rend) {
147       row = idxm[i] - rstart;
148       for (j=0; j<n; j++) {
149         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
150         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
152       }
153     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNCT__
159 #define __FUNCT__ "MatDenseGetArray_MPIDense"
160 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
161 {
162   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
163   PetscErrorCode ierr;
164 
165   PetscFunctionBegin;
166   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
172 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
173 {
174   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
175   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
176   PetscErrorCode ierr;
177   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
178   const PetscInt *irow,*icol;
179   PetscScalar    *av,*bv,*v = lmat->v;
180   Mat            newmat;
181   IS             iscol_local;
182 
183   PetscFunctionBegin;
184   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
185   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
186   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
187   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
189   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
190 
191   /* No parallel redistribution currently supported! Should really check each index set
192      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
193      original matrix! */
194 
195   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
196   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
197 
198   /* Check submatrix call */
199   if (scall == MAT_REUSE_MATRIX) {
200     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
201     /* Really need to test rows and column sizes! */
202     newmat = *B;
203   } else {
204     /* Create and fill new matrix */
205     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
206     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
207     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
208     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
209   }
210 
211   /* Now extract the data pointers and do the copy, column at a time */
212   newmatd = (Mat_MPIDense*)newmat->data;
213   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
214 
215   for (i=0; i<Ncols; i++) {
216     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
217     for (j=0; j<nrows; j++) {
218       *bv++ = av[irow[j] - rstart];
219     }
220   }
221 
222   /* Assemble the matrices so that the correct flags are set */
223   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225 
226   /* Free work space */
227   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
228   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
229   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
230   *B   = newmat;
231   PetscFunctionReturn(0);
232 }
233 
234 #undef __FUNCT__
235 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
236 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
237 {
238   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
239   PetscErrorCode ierr;
240 
241   PetscFunctionBegin;
242   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
248 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
249 {
250   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
251   MPI_Comm       comm;
252   PetscErrorCode ierr;
253   PetscInt       nstash,reallocs;
254   InsertMode     addv;
255 
256   PetscFunctionBegin;
257   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
258   /* make sure all processors are either in INSERTMODE or ADDMODE */
259   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
260   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
261   mat->insertmode = addv; /* in case this processor had no cache */
262 
263   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
264   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
265   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 #undef __FUNCT__
270 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
271 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
272 {
273   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
274   PetscErrorCode ierr;
275   PetscInt       i,*row,*col,flg,j,rstart,ncols;
276   PetscMPIInt    n;
277   PetscScalar    *val;
278   InsertMode     addv=mat->insertmode;
279 
280   PetscFunctionBegin;
281   /*  wait on receives */
282   while (1) {
283     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
284     if (!flg) break;
285 
286     for (i=0; i<n;) {
287       /* Now identify the consecutive vals belonging to the same row */
288       for (j=i,rstart=row[j]; j<n; j++) {
289         if (row[j] != rstart) break;
290       }
291       if (j < n) ncols = j-i;
292       else       ncols = n-i;
293       /* Now assemble all these values with a single function call */
294       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
295       i    = j;
296     }
297   }
298   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
299 
300   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
302 
303   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatZeroEntries_MPIDense"
311 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312 {
313   PetscErrorCode ierr;
314   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
315 
316   PetscFunctionBegin;
317   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 /* the code does not do the diagonal entries correctly unless the
322    matrix is square and the column and row owerships are identical.
323    This is a BUG. The only way to fix it seems to be to access
324    mdn->A and mdn->B directly and not through the MatZeroRows()
325    routine.
326 */
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroRows_MPIDense"
329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330 {
331   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332   PetscErrorCode    ierr;
333   PetscInt          i,*owners = A->rmap->range;
334   PetscInt          *nprocs,j,idx,nsends;
335   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337   PetscInt          *lens,*lrows,*values;
338   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339   MPI_Comm          comm;
340   MPI_Request       *send_waits,*recv_waits;
341   MPI_Status        recv_status,*send_status;
342   PetscBool         found;
343   const PetscScalar *xx;
344   PetscScalar       *bb;
345 
346   PetscFunctionBegin;
347   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
348   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350   /*  first count number of contributors to each processor */
351   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
352   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
353   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
354   for (i=0; i<N; i++) {
355     idx   = rows[i];
356     found = PETSC_FALSE;
357     for (j=0; j<size; j++) {
358       if (idx >= owners[j] && idx < owners[j+1]) {
359         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
360       }
361     }
362     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
363   }
364   nsends = 0;
365   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
366 
367   /* inform other processors of number of messages and max length*/
368   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
369 
370   /* post receives:   */
371   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
372   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
373   for (i=0; i<nrecvs; i++) {
374     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
375   }
376 
377   /* do sends:
378       1) starts[i] gives the starting index in svalues for stuff going to
379          the ith processor
380   */
381   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
382   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
383   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
384 
385   starts[0] = 0;
386   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
387   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
388 
389   starts[0] = 0;
390   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
391   count = 0;
392   for (i=0; i<size; i++) {
393     if (nprocs[2*i+1]) {
394       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
395     }
396   }
397   ierr = PetscFree(starts);CHKERRQ(ierr);
398 
399   base = owners[rank];
400 
401   /*  wait on receives */
402   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
403   count = nrecvs;
404   slen  = 0;
405   while (count) {
406     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
407     /* unpack receives into our local space */
408     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
409 
410     source[imdex] = recv_status.MPI_SOURCE;
411     lens[imdex]   = n;
412     slen += n;
413     count--;
414   }
415   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
416 
417   /* move the data into the send scatter */
418   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
419   count = 0;
420   for (i=0; i<nrecvs; i++) {
421     values = rvalues + i*nmax;
422     for (j=0; j<lens[i]; j++) {
423       lrows[count++] = values[j] - base;
424     }
425   }
426   ierr = PetscFree(rvalues);CHKERRQ(ierr);
427   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
428   ierr = PetscFree(owner);CHKERRQ(ierr);
429   ierr = PetscFree(nprocs);CHKERRQ(ierr);
430 
431   /* fix right hand side if needed */
432   if (x && b) {
433     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
434     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
435     for (i=0; i<slen; i++) {
436       bb[lrows[i]] = diag*xx[lrows[i]];
437     }
438     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
439     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
440   }
441 
442   /* actually zap the local rows */
443   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
444   if (diag != 0.0) {
445     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
446     PetscInt     m   = ll->lda, i;
447 
448     for (i=0; i<slen; i++) {
449       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
450     }
451   }
452   ierr = PetscFree(lrows);CHKERRQ(ierr);
453 
454   /* wait on sends */
455   if (nsends) {
456     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
457     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
458     ierr = PetscFree(send_status);CHKERRQ(ierr);
459   }
460   ierr = PetscFree(send_waits);CHKERRQ(ierr);
461   ierr = PetscFree(svalues);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 #undef __FUNCT__
466 #define __FUNCT__ "MatMult_MPIDense"
467 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
468 {
469   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
470   PetscErrorCode ierr;
471 
472   PetscFunctionBegin;
473   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
475   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatMultAdd_MPIDense"
481 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
482 {
483   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
484   PetscErrorCode ierr;
485 
486   PetscFunctionBegin;
487   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
489   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatMultTranspose_MPIDense"
495 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
496 {
497   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
498   PetscErrorCode ierr;
499   PetscScalar    zero = 0.0;
500 
501   PetscFunctionBegin;
502   ierr = VecSet(yy,zero);CHKERRQ(ierr);
503   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
504   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
505   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508 
509 #undef __FUNCT__
510 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
511 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
512 {
513   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
514   PetscErrorCode ierr;
515 
516   PetscFunctionBegin;
517   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
518   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
519   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
520   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatGetDiagonal_MPIDense"
526 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
527 {
528   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
529   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
530   PetscErrorCode ierr;
531   PetscInt       len,i,n,m = A->rmap->n,radd;
532   PetscScalar    *x,zero = 0.0;
533 
534   PetscFunctionBegin;
535   ierr = VecSet(v,zero);CHKERRQ(ierr);
536   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
537   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
538   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
539   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
540   radd = A->rmap->rstart*m;
541   for (i=0; i<len; i++) {
542     x[i] = aloc->v[radd + i*m + i];
543   }
544   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatDestroy_MPIDense"
550 PetscErrorCode MatDestroy_MPIDense(Mat mat)
551 {
552   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556 #if defined(PETSC_USE_LOG)
557   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
558 #endif
559   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
560   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
561   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
562   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
563 
564   ierr = PetscFree(mat->data);CHKERRQ(ierr);
565   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatView_MPIDense_Binary"
576 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
577 {
578   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
579   PetscErrorCode    ierr;
580   PetscViewerFormat format;
581   int               fd;
582   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
583   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
584   PetscScalar       *work,*v,*vv;
585   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
586 
587   PetscFunctionBegin;
588   if (mdn->size == 1) {
589     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
590   } else {
591     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
592     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
593     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
594 
595     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
596     if (format == PETSC_VIEWER_NATIVE) {
597 
598       if (!rank) {
599         /* store the matrix as a dense matrix */
600         header[0] = MAT_FILE_CLASSID;
601         header[1] = mat->rmap->N;
602         header[2] = N;
603         header[3] = MATRIX_BINARY_FORMAT_DENSE;
604         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
605 
606         /* get largest work array needed for transposing array */
607         mmax = mat->rmap->n;
608         for (i=1; i<size; i++) {
609           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
610         }
611         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
612 
613         /* write out local array, by rows */
614         m = mat->rmap->n;
615         v = a->v;
616         for (j=0; j<N; j++) {
617           for (i=0; i<m; i++) {
618             work[j + i*N] = *v++;
619           }
620         }
621         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
622         /* get largest work array to receive messages from other processes, excludes process zero */
623         mmax = 0;
624         for (i=1; i<size; i++) {
625           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
626         }
627         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
628         for (k = 1; k < size; k++) {
629           v    = vv;
630           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
631           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
632 
633           for (j = 0; j < N; j++) {
634             for (i = 0; i < m; i++) {
635               work[j + i*N] = *v++;
636             }
637           }
638           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
639         }
640         ierr = PetscFree(work);CHKERRQ(ierr);
641         ierr = PetscFree(vv);CHKERRQ(ierr);
642       } else {
643         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
644       }
645     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 #include <petscdraw.h>
651 #undef __FUNCT__
652 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
653 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
654 {
655   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
656   PetscErrorCode    ierr;
657   PetscMPIInt       size = mdn->size,rank = mdn->rank;
658   PetscViewerType   vtype;
659   PetscBool         iascii,isdraw;
660   PetscViewer       sviewer;
661   PetscViewerFormat format;
662 
663   PetscFunctionBegin;
664   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
665   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
666   if (iascii) {
667     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
668     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
669     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
670       MatInfo info;
671       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
672       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
673       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
674       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
675       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
676       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
677       PetscFunctionReturn(0);
678     } else if (format == PETSC_VIEWER_ASCII_INFO) {
679       PetscFunctionReturn(0);
680     }
681   } else if (isdraw) {
682     PetscDraw draw;
683     PetscBool isnull;
684 
685     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
686     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
687     if (isnull) PetscFunctionReturn(0);
688   }
689 
690   if (size == 1) {
691     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
692   } else {
693     /* assemble the entire matrix onto first processor. */
694     Mat         A;
695     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
696     PetscInt    *cols;
697     PetscScalar *vals;
698 
699     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
700     if (!rank) {
701       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
702     } else {
703       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
704     }
705     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
706     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
707     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
708     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
709 
710     /* Copy the matrix ... This isn't the most efficient means,
711        but it's quick for now */
712     A->insertmode = INSERT_VALUES;
713 
714     row = mat->rmap->rstart;
715     m   = mdn->A->rmap->n;
716     for (i=0; i<m; i++) {
717       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
718       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
719       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
720       row++;
721     }
722 
723     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
724     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
725     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
726     if (!rank) {
727       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
728       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
729       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
730       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
731     }
732     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
733     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
734     ierr = MatDestroy(&A);CHKERRQ(ierr);
735   }
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatView_MPIDense"
741 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
742 {
743   PetscErrorCode ierr;
744   PetscBool      iascii,isbinary,isdraw,issocket;
745 
746   PetscFunctionBegin;
747   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
748   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
749   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
750   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
751 
752   if (iascii || issocket || isdraw) {
753     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
754   } else if (isbinary) {
755     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
756   }
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatGetInfo_MPIDense"
762 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
763 {
764   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
765   Mat            mdn  = mat->A;
766   PetscErrorCode ierr;
767   PetscReal      isend[5],irecv[5];
768 
769   PetscFunctionBegin;
770   info->block_size = 1.0;
771 
772   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
773 
774   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
775   isend[3] = info->memory;  isend[4] = info->mallocs;
776   if (flag == MAT_LOCAL) {
777     info->nz_used      = isend[0];
778     info->nz_allocated = isend[1];
779     info->nz_unneeded  = isend[2];
780     info->memory       = isend[3];
781     info->mallocs      = isend[4];
782   } else if (flag == MAT_GLOBAL_MAX) {
783     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
784 
785     info->nz_used      = irecv[0];
786     info->nz_allocated = irecv[1];
787     info->nz_unneeded  = irecv[2];
788     info->memory       = irecv[3];
789     info->mallocs      = irecv[4];
790   } else if (flag == MAT_GLOBAL_SUM) {
791     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
792 
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   }
799   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
800   info->fill_ratio_needed = 0;
801   info->factor_mallocs    = 0;
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatSetOption_MPIDense"
807 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
808 {
809   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
810   PetscErrorCode ierr;
811 
812   PetscFunctionBegin;
813   switch (op) {
814   case MAT_NEW_NONZERO_LOCATIONS:
815   case MAT_NEW_NONZERO_LOCATION_ERR:
816   case MAT_NEW_NONZERO_ALLOCATION_ERR:
817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
818     break;
819   case MAT_ROW_ORIENTED:
820     a->roworiented = flg;
821 
822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
823     break;
824   case MAT_NEW_DIAGONALS:
825   case MAT_KEEP_NONZERO_PATTERN:
826   case MAT_USE_HASH_TABLE:
827     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
828     break;
829   case MAT_IGNORE_OFF_PROC_ENTRIES:
830     a->donotstash = flg;
831     break;
832   case MAT_SYMMETRIC:
833   case MAT_STRUCTURALLY_SYMMETRIC:
834   case MAT_HERMITIAN:
835   case MAT_SYMMETRY_ETERNAL:
836   case MAT_IGNORE_LOWER_TRIANGULAR:
837     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
838     break;
839   default:
840     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
841   }
842   PetscFunctionReturn(0);
843 }
844 
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatDiagonalScale_MPIDense"
848 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
849 {
850   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
851   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
852   PetscScalar    *l,*r,x,*v;
853   PetscErrorCode ierr;
854   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
855 
856   PetscFunctionBegin;
857   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
858   if (ll) {
859     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
860     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
861     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
862     for (i=0; i<m; i++) {
863       x = l[i];
864       v = mat->v + i;
865       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
866     }
867     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
868     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
869   }
870   if (rr) {
871     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
872     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
873     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
876     for (i=0; i<n; i++) {
877       x = r[i];
878       v = mat->v + i*m;
879       for (j=0; j<m; j++) (*v++) *= x;
880     }
881     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
882     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
883   }
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatNorm_MPIDense"
889 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
890 {
891   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
892   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
893   PetscErrorCode ierr;
894   PetscInt       i,j;
895   PetscReal      sum = 0.0;
896   PetscScalar    *v  = mat->v;
897 
898   PetscFunctionBegin;
899   if (mdn->size == 1) {
900     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
901   } else {
902     if (type == NORM_FROBENIUS) {
903       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
904         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
905       }
906       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907       *nrm = PetscSqrtReal(*nrm);
908       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
909     } else if (type == NORM_1) {
910       PetscReal *tmp,*tmp2;
911       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
912       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
913       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
914       *nrm = 0.0;
915       v    = mat->v;
916       for (j=0; j<mdn->A->cmap->n; j++) {
917         for (i=0; i<mdn->A->rmap->n; i++) {
918           tmp[j] += PetscAbsScalar(*v);  v++;
919         }
920       }
921       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
922       for (j=0; j<A->cmap->N; j++) {
923         if (tmp2[j] > *nrm) *nrm = tmp2[j];
924       }
925       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
926       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
927     } else if (type == NORM_INFINITY) { /* max row norm */
928       PetscReal ntemp;
929       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
930       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
931     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 #undef __FUNCT__
937 #define __FUNCT__ "MatTranspose_MPIDense"
938 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
939 {
940   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
941   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
942   Mat            B;
943   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
944   PetscErrorCode ierr;
945   PetscInt       j,i;
946   PetscScalar    *v;
947 
948   PetscFunctionBegin;
949   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
950   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
951     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
952     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
953     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
954     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
955   } else {
956     B = *matout;
957   }
958 
959   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
960   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
961   for (i=0; i<m; i++) rwork[i] = rstart + i;
962   for (j=0; j<n; j++) {
963     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
964     v   += m;
965   }
966   ierr = PetscFree(rwork);CHKERRQ(ierr);
967   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
969   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
970     *matout = B;
971   } else {
972     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
973   }
974   PetscFunctionReturn(0);
975 }
976 
977 
978 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
979 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatSetUp_MPIDense"
983 PetscErrorCode MatSetUp_MPIDense(Mat A)
984 {
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatAXPY_MPIDense"
994 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
995 {
996   PetscErrorCode ierr;
997   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
998 
999   PetscFunctionBegin;
1000   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MatConjugate_MPIDense"
1006 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1007 {
1008   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1009   PetscErrorCode ierr;
1010 
1011   PetscFunctionBegin;
1012   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatRealPart_MPIDense"
1018 PetscErrorCode MatRealPart_MPIDense(Mat A)
1019 {
1020   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1030 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1031 {
1032   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1033   PetscErrorCode ierr;
1034 
1035   PetscFunctionBegin;
1036   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1043 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1044 {
1045   PetscErrorCode ierr;
1046   PetscInt       i,n;
1047   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1048   PetscReal      *work;
1049 
1050   PetscFunctionBegin;
1051   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1052   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
1053   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1054   if (type == NORM_2) {
1055     for (i=0; i<n; i++) work[i] *= work[i];
1056   }
1057   if (type == NORM_INFINITY) {
1058     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1059   } else {
1060     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1061   }
1062   ierr = PetscFree(work);CHKERRQ(ierr);
1063   if (type == NORM_2) {
1064     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatSetRandom_MPIDense"
1071 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1072 {
1073   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1074   PetscErrorCode ierr;
1075   PetscScalar    *a;
1076   PetscInt       m,n,i;
1077 
1078   PetscFunctionBegin;
1079   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1080   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1081   for (i=0; i<m*n; i++) {
1082     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1083   }
1084   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /* -------------------------------------------------------------------*/
1089 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1090                                         MatGetRow_MPIDense,
1091                                         MatRestoreRow_MPIDense,
1092                                         MatMult_MPIDense,
1093                                 /*  4*/ MatMultAdd_MPIDense,
1094                                         MatMultTranspose_MPIDense,
1095                                         MatMultTransposeAdd_MPIDense,
1096                                         0,
1097                                         0,
1098                                         0,
1099                                 /* 10*/ 0,
1100                                         0,
1101                                         0,
1102                                         0,
1103                                         MatTranspose_MPIDense,
1104                                 /* 15*/ MatGetInfo_MPIDense,
1105                                         MatEqual_MPIDense,
1106                                         MatGetDiagonal_MPIDense,
1107                                         MatDiagonalScale_MPIDense,
1108                                         MatNorm_MPIDense,
1109                                 /* 20*/ MatAssemblyBegin_MPIDense,
1110                                         MatAssemblyEnd_MPIDense,
1111                                         MatSetOption_MPIDense,
1112                                         MatZeroEntries_MPIDense,
1113                                 /* 24*/ MatZeroRows_MPIDense,
1114                                         0,
1115                                         0,
1116                                         0,
1117                                         0,
1118                                 /* 29*/ MatSetUp_MPIDense,
1119                                         0,
1120                                         0,
1121                                         0,
1122                                         0,
1123                                 /* 34*/ MatDuplicate_MPIDense,
1124                                         0,
1125                                         0,
1126                                         0,
1127                                         0,
1128                                 /* 39*/ MatAXPY_MPIDense,
1129                                         MatGetSubMatrices_MPIDense,
1130                                         0,
1131                                         MatGetValues_MPIDense,
1132                                         0,
1133                                 /* 44*/ 0,
1134                                         MatScale_MPIDense,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                 /* 49*/ MatSetRandom_MPIDense,
1139                                         0,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                 /* 54*/ 0,
1144                                         0,
1145                                         0,
1146                                         0,
1147                                         0,
1148                                 /* 59*/ MatGetSubMatrix_MPIDense,
1149                                         MatDestroy_MPIDense,
1150                                         MatView_MPIDense,
1151                                         0,
1152                                         0,
1153                                 /* 64*/ 0,
1154                                         0,
1155                                         0,
1156                                         0,
1157                                         0,
1158                                 /* 69*/ 0,
1159                                         0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                 /* 74*/ 0,
1164                                         0,
1165                                         0,
1166                                         0,
1167                                         0,
1168                                 /* 79*/ 0,
1169                                         0,
1170                                         0,
1171                                         0,
1172                                 /* 83*/ MatLoad_MPIDense,
1173                                         0,
1174                                         0,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                 /* 89*/
1179                                         0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                 /* 94*/ 0,
1185                                         0,
1186                                         0,
1187                                         0,
1188                                         0,
1189                                 /* 99*/ 0,
1190                                         0,
1191                                         0,
1192                                         MatConjugate_MPIDense,
1193                                         0,
1194                                 /*104*/ 0,
1195                                         MatRealPart_MPIDense,
1196                                         MatImaginaryPart_MPIDense,
1197                                         0,
1198                                         0,
1199                                 /*109*/ 0,
1200                                         0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                 /*114*/ 0,
1205                                         0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                 /*119*/ 0,
1210                                         0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                 /*124*/ 0,
1215                                         MatGetColumnNorms_MPIDense
1216 };
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1220 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1221 {
1222   Mat_MPIDense   *a;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   mat->preallocated = PETSC_TRUE;
1227   /* Note:  For now, when data is specified above, this assumes the user correctly
1228    allocates the local dense storage space.  We should add error checking. */
1229 
1230   a       = (Mat_MPIDense*)mat->data;
1231   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1232   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1233   a->nvec = mat->cmap->n;
1234 
1235   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1236   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1237   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1238   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1239   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatCreate_MPIDense"
1245 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1246 {
1247   Mat_MPIDense   *a;
1248   PetscErrorCode ierr;
1249 
1250   PetscFunctionBegin;
1251   ierr      = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1252   mat->data = (void*)a;
1253   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1254 
1255   mat->insertmode = NOT_SET_VALUES;
1256   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1257   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1258 
1259   /* build cache for off array entries formed */
1260   a->donotstash = PETSC_FALSE;
1261 
1262   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1263 
1264   /* stuff used for matrix vector multiply */
1265   a->lvec        = 0;
1266   a->Mvctx       = 0;
1267   a->roworiented = PETSC_TRUE;
1268 
1269   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1271 
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIDense",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C","MatMPIDenseSetPreallocation_MPIDense",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","MatMatMult_MPIAIJ_MPIDense",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","MatMatMultSymbolic_MPIAIJ_MPIDense",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","MatMatMultNumeric_MPIAIJ_MPIDense",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1277   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 /*MC
1282    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1283 
1284    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1285    and MATMPIDENSE otherwise.
1286 
1287    Options Database Keys:
1288 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1289 
1290   Level: beginner
1291 
1292 
1293 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1294 M*/
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1298 /*@C
1299    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1300 
1301    Not collective
1302 
1303    Input Parameters:
1304 .  A - the matrix
1305 -  data - optional location of matrix data.  Set data=NULL for PETSc
1306    to control all matrix memory allocation.
1307 
1308    Notes:
1309    The dense format is fully compatible with standard Fortran 77
1310    storage by columns.
1311 
1312    The data input variable is intended primarily for Fortran programmers
1313    who wish to allocate their own matrix memory space.  Most users should
1314    set data=NULL.
1315 
1316    Level: intermediate
1317 
1318 .keywords: matrix,dense, parallel
1319 
1320 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1321 @*/
1322 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1323 {
1324   PetscErrorCode ierr;
1325 
1326   PetscFunctionBegin;
1327   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatCreateDense"
1333 /*@C
1334    MatCreateDense - Creates a parallel matrix in dense format.
1335 
1336    Collective on MPI_Comm
1337 
1338    Input Parameters:
1339 +  comm - MPI communicator
1340 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1341 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1342 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1343 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1344 -  data - optional location of matrix data.  Set data=NULL (NULL_SCALAR for Fortran users) for PETSc
1345    to control all matrix memory allocation.
1346 
1347    Output Parameter:
1348 .  A - the matrix
1349 
1350    Notes:
1351    The dense format is fully compatible with standard Fortran 77
1352    storage by columns.
1353 
1354    The data input variable is intended primarily for Fortran programmers
1355    who wish to allocate their own matrix memory space.  Most users should
1356    set data=NULL (NULL_SCALAR for Fortran users).
1357 
1358    The user MUST specify either the local or global matrix dimensions
1359    (possibly both).
1360 
1361    Level: intermediate
1362 
1363 .keywords: matrix,dense, parallel
1364 
1365 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1366 @*/
1367 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1368 {
1369   PetscErrorCode ierr;
1370   PetscMPIInt    size;
1371 
1372   PetscFunctionBegin;
1373   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1374   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1375   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1376   if (size > 1) {
1377     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1378     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1379   } else {
1380     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1381     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1382   }
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatDuplicate_MPIDense"
1388 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1389 {
1390   Mat            mat;
1391   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1392   PetscErrorCode ierr;
1393 
1394   PetscFunctionBegin;
1395   *newmat = 0;
1396   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1397   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1398   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1399   a       = (Mat_MPIDense*)mat->data;
1400   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1401 
1402   mat->factortype   = A->factortype;
1403   mat->assembled    = PETSC_TRUE;
1404   mat->preallocated = PETSC_TRUE;
1405 
1406   a->size         = oldmat->size;
1407   a->rank         = oldmat->rank;
1408   mat->insertmode = NOT_SET_VALUES;
1409   a->nvec         = oldmat->nvec;
1410   a->donotstash   = oldmat->donotstash;
1411 
1412   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1413   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1414 
1415   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1416   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1417   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1418 
1419   *newmat = mat;
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1425 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1426 {
1427   PetscErrorCode ierr;
1428   PetscMPIInt    rank,size;
1429   PetscInt       *rowners,i,m,nz,j;
1430   PetscScalar    *array,*vals,*vals_ptr;
1431 
1432   PetscFunctionBegin;
1433   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1434   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1435 
1436   /* determine ownership of all rows */
1437   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1438   else m = newmat->rmap->n;
1439   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1440   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1441   rowners[0] = 0;
1442   for (i=2; i<=size; i++) {
1443     rowners[i] += rowners[i-1];
1444   }
1445 
1446   if (!sizesset) {
1447     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1448   }
1449   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1450   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1451 
1452   if (!rank) {
1453     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1454 
1455     /* read in my part of the matrix numerical values  */
1456     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1457 
1458     /* insert into matrix-by row (this is why cannot directly read into array */
1459     vals_ptr = vals;
1460     for (i=0; i<m; i++) {
1461       for (j=0; j<N; j++) {
1462         array[i + j*m] = *vals_ptr++;
1463       }
1464     }
1465 
1466     /* read in other processors and ship out */
1467     for (i=1; i<size; i++) {
1468       nz   = (rowners[i+1] - rowners[i])*N;
1469       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1470       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1471     }
1472   } else {
1473     /* receive numeric values */
1474     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1475 
1476     /* receive message of values*/
1477     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1478 
1479     /* insert into matrix-by row (this is why cannot directly read into array */
1480     vals_ptr = vals;
1481     for (i=0; i<m; i++) {
1482       for (j=0; j<N; j++) {
1483         array[i + j*m] = *vals_ptr++;
1484       }
1485     }
1486   }
1487   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1488   ierr = PetscFree(rowners);CHKERRQ(ierr);
1489   ierr = PetscFree(vals);CHKERRQ(ierr);
1490   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1491   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatLoad_MPIDense"
1497 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1498 {
1499   PetscScalar    *vals,*svals;
1500   MPI_Comm       comm;
1501   MPI_Status     status;
1502   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1503   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1504   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1505   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1506   int            fd;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1511   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1512   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1513   if (!rank) {
1514     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1515     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1516     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1517   }
1518   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1519 
1520   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1521   M    = header[1]; N = header[2]; nz = header[3];
1522 
1523   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1524   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1525   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1526 
1527   /* If global sizes are set, check if they are consistent with that given in the file */
1528   if (sizesset) {
1529     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1530   }
1531   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1532   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1533 
1534   /*
1535        Handle case where matrix is stored on disk as a dense matrix
1536   */
1537   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1538     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1539     PetscFunctionReturn(0);
1540   }
1541 
1542   /* determine ownership of all rows */
1543   if (newmat->rmap->n < 0) {
1544     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1545   } else {
1546     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1547   }
1548   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1549   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1550   rowners[0] = 0;
1551   for (i=2; i<=size; i++) {
1552     rowners[i] += rowners[i-1];
1553   }
1554   rstart = rowners[rank];
1555   rend   = rowners[rank+1];
1556 
1557   /* distribute row lengths to all processors */
1558   ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1559   if (!rank) {
1560     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1561     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1562     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1563     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1564     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1565     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1566   } else {
1567     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1568   }
1569 
1570   if (!rank) {
1571     /* calculate the number of nonzeros on each processor */
1572     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1573     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1574     for (i=0; i<size; i++) {
1575       for (j=rowners[i]; j< rowners[i+1]; j++) {
1576         procsnz[i] += rowlengths[j];
1577       }
1578     }
1579     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1580 
1581     /* determine max buffer needed and allocate it */
1582     maxnz = 0;
1583     for (i=0; i<size; i++) {
1584       maxnz = PetscMax(maxnz,procsnz[i]);
1585     }
1586     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1587 
1588     /* read in my part of the matrix column indices  */
1589     nz   = procsnz[0];
1590     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1591     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1592 
1593     /* read in every one elses and ship off */
1594     for (i=1; i<size; i++) {
1595       nz   = procsnz[i];
1596       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1597       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1598     }
1599     ierr = PetscFree(cols);CHKERRQ(ierr);
1600   } else {
1601     /* determine buffer space needed for message */
1602     nz = 0;
1603     for (i=0; i<m; i++) {
1604       nz += ourlens[i];
1605     }
1606     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1607 
1608     /* receive message of column indices*/
1609     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1610     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1611     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1612   }
1613 
1614   /* loop over local rows, determining number of off diagonal entries */
1615   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1616   jj   = 0;
1617   for (i=0; i<m; i++) {
1618     for (j=0; j<ourlens[i]; j++) {
1619       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1620       jj++;
1621     }
1622   }
1623 
1624   /* create our matrix */
1625   for (i=0; i<m; i++) ourlens[i] -= offlens[i];
1626 
1627   if (!sizesset) {
1628     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1629   }
1630   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1631   for (i=0; i<m; i++) ourlens[i] += offlens[i];
1632 
1633   if (!rank) {
1634     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1635 
1636     /* read in my part of the matrix numerical values  */
1637     nz   = procsnz[0];
1638     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1639 
1640     /* insert into matrix */
1641     jj      = rstart;
1642     smycols = mycols;
1643     svals   = vals;
1644     for (i=0; i<m; i++) {
1645       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1646       smycols += ourlens[i];
1647       svals   += ourlens[i];
1648       jj++;
1649     }
1650 
1651     /* read in other processors and ship out */
1652     for (i=1; i<size; i++) {
1653       nz   = procsnz[i];
1654       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1655       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1656     }
1657     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1658   } else {
1659     /* receive numeric values */
1660     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1661 
1662     /* receive message of values*/
1663     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1664     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1665     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1666 
1667     /* insert into matrix */
1668     jj      = rstart;
1669     smycols = mycols;
1670     svals   = vals;
1671     for (i=0; i<m; i++) {
1672       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1673       smycols += ourlens[i];
1674       svals   += ourlens[i];
1675       jj++;
1676     }
1677   }
1678   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1679   ierr = PetscFree(vals);CHKERRQ(ierr);
1680   ierr = PetscFree(mycols);CHKERRQ(ierr);
1681   ierr = PetscFree(rowners);CHKERRQ(ierr);
1682 
1683   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1684   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatEqual_MPIDense"
1690 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1691 {
1692   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1693   Mat            a,b;
1694   PetscBool      flg;
1695   PetscErrorCode ierr;
1696 
1697   PetscFunctionBegin;
1698   a    = matA->A;
1699   b    = matB->A;
1700   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1701   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705