xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 454a90a3eb7bca6958262e5eca1eb393ad97e108)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.125 1999/09/20 19:35:08 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 EXTERN_C_BEGIN
13 #undef __FUNC__
14 #define __FUNC__ "MatGetDiagonalBlock_MPIDense"
15 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
16 {
17   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
18   int          m = mdn->m,rstart = mdn->rstart,rank,ierr;
19   Scalar       *array;
20   MPI_Comm     comm;
21 
22   PetscFunctionBegin;
23   if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported.");
24 
25   /* The reuse aspect is not implemented efficiently */
26   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
27 
28   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
29   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
30   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
32   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
33   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
34   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
35 
36   *iscopy = PETSC_TRUE;
37   PetscFunctionReturn(0);
38 }
39 EXTERN_C_END
40 
41 extern int MatSetUpMultiply_MPIDense(Mat);
42 
43 #undef __FUNC__
44 #define __FUNC__ "MatSetValues_MPIDense"
45 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
46 {
47   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
48   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
49   int          roworiented = A->roworiented;
50 
51   PetscFunctionBegin;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) continue;
54     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       if (roworiented) {
58         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
59       } else {
60         for ( j=0; j<n; j++ ) {
61           if (idxn[j] < 0) continue;
62           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
63           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
64         }
65       }
66     } else {
67       if ( !A->donotstash) {
68         if (roworiented) {
69           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
70         } else {
71           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
72         }
73       }
74     }
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 #undef __FUNC__
80 #define __FUNC__ "MatGetValues_MPIDense"
81 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
82 {
83   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
84   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
85 
86   PetscFunctionBegin;
87   for ( i=0; i<m; i++ ) {
88     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
89     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
90     if (idxm[i] >= rstart && idxm[i] < rend) {
91       row = idxm[i] - rstart;
92       for ( j=0; j<n; j++ ) {
93         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
94         if (idxn[j] >= mdn->N) {
95           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
96         }
97         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
98       }
99     } else {
100       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
101     }
102   }
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNC__
107 #define __FUNC__ "MatGetArray_MPIDense"
108 int MatGetArray_MPIDense(Mat A,Scalar **array)
109 {
110   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
111   int          ierr;
112 
113   PetscFunctionBegin;
114   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
115   PetscFunctionReturn(0);
116 }
117 
118 #undef __FUNC__
119 #define __FUNC__ "MatGetSubMatrix_MPIDense"
120 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
121 {
122   Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd;
123   Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data;
124   int          i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank;
125   Scalar       *av, *bv, *v = lmat->v;
126   Mat          newmat;
127 
128   PetscFunctionBegin;
129   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
130   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
131   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
132   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
133   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
134 
135   /* No parallel redistribution currently supported! Should really check each index set
136      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
137      original matrix! */
138 
139   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
140   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
141 
142   /* Check submatrix call */
143   if (scall == MAT_REUSE_MATRIX) {
144     /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */
145     /* Really need to test rows and column sizes! */
146     newmat = *B;
147   } else {
148     /* Create and fill new matrix */
149     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
150   }
151 
152   /* Now extract the data pointers and do the copy, column at a time */
153   newmatd = (Mat_MPIDense *) newmat->data;
154   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
155 
156   for ( i=0; i<ncols; i++ ) {
157     av = v + nlrows*icol[i];
158     for (j=0; j<nrows; j++ ) {
159       *bv++ = av[irow[j] - rstart];
160     }
161   }
162 
163   /* Assemble the matrices so that the correct flags are set */
164   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
166 
167   /* Free work space */
168   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
169   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
170   *B = newmat;
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNC__
175 #define __FUNC__ "MatRestoreArray_MPIDense"
176 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
177 {
178   PetscFunctionBegin;
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNC__
183 #define __FUNC__ "MatAssemblyBegin_MPIDense"
184 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
185 {
186   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
187   MPI_Comm     comm = mat->comm;
188   int          ierr,nstash,reallocs;
189   InsertMode   addv;
190 
191   PetscFunctionBegin;
192   /* make sure all processors are either in INSERTMODE or ADDMODE */
193   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
194   if (addv == (ADD_VALUES|INSERT_VALUES)) {
195     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
196   }
197   mat->insertmode = addv; /* in case this processor had no cache */
198 
199   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
200   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
201   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
202            nstash,reallocs);
203   PetscFunctionReturn(0);
204 }
205 
206 #undef __FUNC__
207 #define __FUNC__ "MatAssemblyEnd_MPIDense"
208 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
209 {
210   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
211   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
212   Scalar       *val;
213   InsertMode   addv=mat->insertmode;
214 
215   PetscFunctionBegin;
216   /*  wait on receives */
217   while (1) {
218     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
219     if (!flg) break;
220 
221     for ( i=0; i<n; ) {
222       /* Now identify the consecutive vals belonging to the same row */
223       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
224       if (j < n) ncols = j-i;
225       else       ncols = n-i;
226       /* Now assemble all these values with a single function call */
227       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
228       i = j;
229     }
230   }
231   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNC__
243 #define __FUNC__ "MatZeroEntries_MPIDense"
244 int MatZeroEntries_MPIDense(Mat A)
245 {
246   int          ierr;
247   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNC__
255 #define __FUNC__ "MatGetBlockSize_MPIDense"
256 int MatGetBlockSize_MPIDense(Mat A,int *bs)
257 {
258   PetscFunctionBegin;
259   *bs = 1;
260   PetscFunctionReturn(0);
261 }
262 
263 /* the code does not do the diagonal entries correctly unless the
264    matrix is square and the column and row owerships are identical.
265    This is a BUG. The only way to fix it seems to be to access
266    mdn->A and mdn->B directly and not through the MatZeroRows()
267    routine.
268 */
269 #undef __FUNC__
270 #define __FUNC__ "MatZeroRows_MPIDense"
271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
272 {
273   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
274   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
275   int            *procs,*nprocs,j,found,idx,nsends,*work;
276   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
277   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
278   int            *lens,imdex,*lrows,*values;
279   MPI_Comm       comm = A->comm;
280   MPI_Request    *send_waits,*recv_waits;
281   MPI_Status     recv_status,*send_status;
282   IS             istmp;
283 
284   PetscFunctionBegin;
285   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
286   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
287 
288   /*  first count number of contributors to each processor */
289   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
290   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
291   procs  = nprocs + size;
292   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
293   for ( i=0; i<N; i++ ) {
294     idx = rows[i];
295     found = 0;
296     for ( j=0; j<size; j++ ) {
297       if (idx >= owners[j] && idx < owners[j+1]) {
298         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
299       }
300     }
301     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
302   }
303   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
304 
305   /* inform other processors of number of messages and max length*/
306   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
307   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
308   nrecvs = work[rank];
309   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
310   nmax   = work[rank];
311   ierr = PetscFree(work);CHKERRQ(ierr);
312 
313   /* post receives:   */
314   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
315   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
316   for ( i=0; i<nrecvs; i++ ) {
317     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
318   }
319 
320   /* do sends:
321       1) starts[i] gives the starting index in svalues for stuff going to
322          the ith processor
323   */
324   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
325   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
326   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
327   starts[0]  = 0;
328   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
329   for ( i=0; i<N; i++ ) {
330     svalues[starts[owner[i]]++] = rows[i];
331   }
332   ISRestoreIndices(is,&rows);
333 
334   starts[0] = 0;
335   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
336   count = 0;
337   for ( i=0; i<size; i++ ) {
338     if (procs[i]) {
339       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
340     }
341   }
342   ierr = PetscFree(starts);CHKERRQ(ierr);
343 
344   base = owners[rank];
345 
346   /*  wait on receives */
347   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
348   source = lens + nrecvs;
349   count  = nrecvs; slen = 0;
350   while (count) {
351     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
352     /* unpack receives into our local space */
353     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
354     source[imdex]  = recv_status.MPI_SOURCE;
355     lens[imdex]  = n;
356     slen += n;
357     count--;
358   }
359   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
360 
361   /* move the data into the send scatter */
362   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
363   count = 0;
364   for ( i=0; i<nrecvs; i++ ) {
365     values = rvalues + i*nmax;
366     for ( j=0; j<lens[i]; j++ ) {
367       lrows[count++] = values[j] - base;
368     }
369   }
370   ierr = PetscFree(rvalues);CHKERRQ(ierr);
371   ierr = PetscFree(lens);CHKERRQ(ierr);
372   ierr = PetscFree(owner);CHKERRQ(ierr);
373   ierr = PetscFree(nprocs);CHKERRQ(ierr);
374 
375   /* actually zap the local rows */
376   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
377   PLogObjectParent(A,istmp);
378   ierr = PetscFree(lrows);CHKERRQ(ierr);
379   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
380   ierr = ISDestroy(istmp);CHKERRQ(ierr);
381 
382   /* wait on sends */
383   if (nsends) {
384     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
385     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
386     ierr = PetscFree(send_status);CHKERRQ(ierr);
387   }
388   ierr = PetscFree(send_waits);CHKERRQ(ierr);
389   ierr = PetscFree(svalues);CHKERRQ(ierr);
390 
391   PetscFunctionReturn(0);
392 }
393 
394 #undef __FUNC__
395 #define __FUNC__ "MatMult_MPIDense"
396 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
397 {
398   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
399   int          ierr;
400 
401   PetscFunctionBegin;
402   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
403   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
404   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNC__
409 #define __FUNC__ "MatMultAdd_MPIDense"
410 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
411 {
412   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
413   int          ierr;
414 
415   PetscFunctionBegin;
416   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
417   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
418   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 #undef __FUNC__
423 #define __FUNC__ "MatMultTrans_MPIDense"
424 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
425 {
426   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
427   int          ierr;
428   Scalar       zero = 0.0;
429 
430   PetscFunctionBegin;
431   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
432   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
433   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
434   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNC__
439 #define __FUNC__ "MatMultTransAdd_MPIDense"
440 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
441 {
442   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
443   int          ierr;
444 
445   PetscFunctionBegin;
446   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
447   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
448   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
449   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNC__
454 #define __FUNC__ "MatGetDiagonal_MPIDense"
455 int MatGetDiagonal_MPIDense(Mat A,Vec v)
456 {
457   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
458   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
459   int          ierr, len, i, n, m = a->m, radd;
460   Scalar       *x, zero = 0.0;
461 
462   PetscFunctionBegin;
463   VecSet(&zero,v);
464   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
465   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
466   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
467   len = PetscMin(aloc->m,aloc->n);
468   radd = a->rstart*m;
469   for ( i=0; i<len; i++ ) {
470     x[i] = aloc->v[radd + i*m + i];
471   }
472   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNC__
477 #define __FUNC__ "MatDestroy_MPIDense"
478 int MatDestroy_MPIDense(Mat mat)
479 {
480   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
481   int          ierr;
482 
483   PetscFunctionBegin;
484 
485   if (mat->mapping) {
486     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
487   }
488   if (mat->bmapping) {
489     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
490   }
491 #if defined(PETSC_USE_LOG)
492   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
493 #endif
494   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
495   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
496   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
497   if (mdn->lvec)   VecDestroy(mdn->lvec);
498   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
499   if (mdn->factor) {
500     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
501     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
502     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
503     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
504   }
505   ierr = PetscFree(mdn);CHKERRQ(ierr);
506   if (mat->rmap) {
507     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
508   }
509   if (mat->cmap) {
510     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
511   }
512   PLogObjectDestroy(mat);
513   PetscHeaderDestroy(mat);
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNC__
518 #define __FUNC__ "MatView_MPIDense_Binary"
519 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
520 {
521   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
522   int          ierr;
523 
524   PetscFunctionBegin;
525   if (mdn->size == 1) {
526     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
527   }
528   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
529   PetscFunctionReturn(0);
530 }
531 
532 #undef __FUNC__
533 #define __FUNC__ "MatView_MPIDense_ASCII"
534 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
535 {
536   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
537   int          ierr, format, size = mdn->size, rank = mdn->rank;
538   FILE         *fd;
539   ViewerType   vtype;
540 
541   PetscFunctionBegin;
542   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
543   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
544   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
545   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
546     MatInfo info;
547     ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
548     PetscSequentialPhaseBegin(mat->comm,1);
549       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
550          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
551       fflush(fd);
552     PetscSequentialPhaseEnd(mat->comm,1);
553     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
554     PetscFunctionReturn(0);
555   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
556     PetscFunctionReturn(0);
557   }
558 
559   if (size == 1) {
560     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
561   } else {
562     /* assemble the entire matrix onto first processor. */
563     Mat          A;
564     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
565     Scalar       *vals;
566     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
567 
568     if (!rank) {
569       ierr = MatCreateMPIDense(mat->comm,M,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
570     } else {
571       ierr = MatCreateMPIDense(mat->comm,0,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
572     }
573     PLogObjectParent(mat,A);
574 
575     /* Copy the matrix ... This isn't the most efficient means,
576        but it's quick for now */
577     row = mdn->rstart; m = Amdn->m;
578     for ( i=0; i<m; i++ ) {
579       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
580       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
581       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
582       row++;
583     }
584 
585     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
586     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
587     if (!rank) {
588       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr);
589     }
590     ierr = MatDestroy(A);CHKERRQ(ierr);
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNC__
596 #define __FUNC__ "MatView_MPIDense"
597 int MatView_MPIDense(Mat mat,Viewer viewer)
598 {
599   int          ierr;
600 
601   PetscFunctionBegin;
602   if (PetscTypeCompare(viewer,ASCII_VIEWER)) {
603     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
604   } else if (PetscTypeCompare(viewer,BINARY_VIEWER)) {
605     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
606   } else {
607     SETERRQ(1,1,"Viewer type not supported by PETSc object");
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNC__
613 #define __FUNC__ "MatGetInfo_MPIDense"
614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
615 {
616   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
617   Mat          mdn = mat->A;
618   int          ierr;
619   double       isend[5], irecv[5];
620 
621   PetscFunctionBegin;
622   info->rows_global    = (double)mat->M;
623   info->columns_global = (double)mat->N;
624   info->rows_local     = (double)mat->m;
625   info->columns_local  = (double)mat->N;
626   info->block_size     = 1.0;
627   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
628   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
629   isend[3] = info->memory;  isend[4] = info->mallocs;
630   if (flag == MAT_LOCAL) {
631     info->nz_used      = isend[0];
632     info->nz_allocated = isend[1];
633     info->nz_unneeded  = isend[2];
634     info->memory       = isend[3];
635     info->mallocs      = isend[4];
636   } else if (flag == MAT_GLOBAL_MAX) {
637     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
638     info->nz_used      = irecv[0];
639     info->nz_allocated = irecv[1];
640     info->nz_unneeded  = irecv[2];
641     info->memory       = irecv[3];
642     info->mallocs      = irecv[4];
643   } else if (flag == MAT_GLOBAL_SUM) {
644     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
645     info->nz_used      = irecv[0];
646     info->nz_allocated = irecv[1];
647     info->nz_unneeded  = irecv[2];
648     info->memory       = irecv[3];
649     info->mallocs      = irecv[4];
650   }
651   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
652   info->fill_ratio_needed = 0;
653   info->factor_mallocs    = 0;
654   PetscFunctionReturn(0);
655 }
656 
657 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
658    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
659    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
660    extern int MatSolve_MPIDense(Mat,Vec,Vec);
661    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
662    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
663    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
664 
665 #undef __FUNC__
666 #define __FUNC__ "MatSetOption_MPIDense"
667 int MatSetOption_MPIDense(Mat A,MatOption op)
668 {
669   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
670 
671   PetscFunctionBegin;
672   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
673       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
674       op == MAT_NEW_NONZERO_LOCATION_ERR ||
675       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
676       op == MAT_COLUMNS_SORTED ||
677       op == MAT_COLUMNS_UNSORTED) {
678         MatSetOption(a->A,op);
679   } else if (op == MAT_ROW_ORIENTED) {
680         a->roworiented = 1;
681         MatSetOption(a->A,op);
682   } else if (op == MAT_ROWS_SORTED ||
683              op == MAT_ROWS_UNSORTED ||
684              op == MAT_SYMMETRIC ||
685              op == MAT_STRUCTURALLY_SYMMETRIC ||
686              op == MAT_YES_NEW_DIAGONALS ||
687              op == MAT_USE_HASH_TABLE) {
688     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
689   } else if (op == MAT_COLUMN_ORIENTED) {
690     a->roworiented = 0; MatSetOption(a->A,op);
691   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
692     a->donotstash = 1;
693   } else if (op == MAT_NO_NEW_DIAGONALS) {
694     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
695   } else {
696     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNC__
702 #define __FUNC__ "MatGetSize_MPIDense"
703 int MatGetSize_MPIDense(Mat A,int *m,int *n)
704 {
705   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
706 
707   PetscFunctionBegin;
708   *m = mat->M; *n = mat->N;
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNC__
713 #define __FUNC__ "MatGetLocalSize_MPIDense"
714 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
715 {
716   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
717 
718   PetscFunctionBegin;
719   *m = mat->m; *n = mat->N;
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNC__
724 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
725 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
726 {
727   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
728 
729   PetscFunctionBegin;
730   *m = mat->rstart; *n = mat->rend;
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNC__
735 #define __FUNC__ "MatGetRow_MPIDense"
736 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
737 {
738   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
739   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
740 
741   PetscFunctionBegin;
742   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
743   lrow = row - rstart;
744   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNC__
749 #define __FUNC__ "MatRestoreRow_MPIDense"
750 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
751 {
752   int ierr;
753 
754   PetscFunctionBegin;
755   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
756   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNC__
761 #define __FUNC__ "MatDiagonalScale_MPIDense"
762 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
763 {
764   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
765   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
766   Scalar       *l,*r,x,*v;
767   int          ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n;
768 
769   PetscFunctionBegin;
770   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
771   if (ll) {
772     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
773     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size");
774     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
775     for ( i=0; i<m; i++ ) {
776       x = l[i];
777       v = mat->v + i;
778       for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;}
779     }
780     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
781     PLogFlops(n*m);
782   }
783   if (rr) {
784     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
785     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size");
786     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
787     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
788     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
789     for ( i=0; i<n; i++ ) {
790       x = r[i];
791       v = mat->v + i*m;
792       for ( j=0; j<m; j++ ) { (*v++) *= x;}
793     }
794     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
795     PLogFlops(n*m);
796   }
797   PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNC__
801 #define __FUNC__ "MatNorm_MPIDense"
802 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
803 {
804   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
805   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
806   int          ierr, i, j;
807   double       sum = 0.0;
808   Scalar       *v = mat->v;
809 
810   PetscFunctionBegin;
811   if (mdn->size == 1) {
812     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
813   } else {
814     if (type == NORM_FROBENIUS) {
815       for (i=0; i<mat->n*mat->m; i++ ) {
816 #if defined(PETSC_USE_COMPLEX)
817         sum += PetscReal(PetscConj(*v)*(*v)); v++;
818 #else
819         sum += (*v)*(*v); v++;
820 #endif
821       }
822       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
823       *norm = sqrt(*norm);
824       PLogFlops(2*mat->n*mat->m);
825     } else if (type == NORM_1) {
826       double *tmp, *tmp2;
827       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
828       tmp2 = tmp + mdn->N;
829       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
830       *norm = 0.0;
831       v = mat->v;
832       for ( j=0; j<mat->n; j++ ) {
833         for ( i=0; i<mat->m; i++ ) {
834           tmp[j] += PetscAbsScalar(*v);  v++;
835         }
836       }
837       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
838       for ( j=0; j<mdn->N; j++ ) {
839         if (tmp2[j] > *norm) *norm = tmp2[j];
840       }
841       ierr = PetscFree(tmp);CHKERRQ(ierr);
842       PLogFlops(mat->n*mat->m);
843     } else if (type == NORM_INFINITY) { /* max row norm */
844       double ntemp;
845       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
846       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
847     } else {
848       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
849     }
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 #undef __FUNC__
855 #define __FUNC__ "MatTranspose_MPIDense"
856 int MatTranspose_MPIDense(Mat A,Mat *matout)
857 {
858   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
859   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
860   Mat          B;
861   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
862   int          j, i, ierr;
863   Scalar       *v;
864 
865   PetscFunctionBegin;
866   if (matout == PETSC_NULL && M != N) {
867     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
868   }
869   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
870 
871   m = Aloc->m; n = Aloc->n; v = Aloc->v;
872   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
873   for ( j=0; j<n; j++ ) {
874     for (i=0; i<m; i++) rwork[i] = rstart + i;
875     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
876     v   += m;
877   }
878   ierr = PetscFree(rwork);CHKERRQ(ierr);
879   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
880   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
881   if (matout != PETSC_NULL) {
882     *matout = B;
883   } else {
884     PetscOps *Abops;
885     MatOps   Aops;
886 
887     /* This isn't really an in-place transpose, but free data struct from a */
888     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
889     ierr = MatDestroy(a->A);CHKERRQ(ierr);
890     if (a->lvec) VecDestroy(a->lvec);
891     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
892     ierr = PetscFree(a);CHKERRQ(ierr);
893 
894     /*
895          This is horrible, horrible code. We need to keep the
896       A pointers for the bops and ops but copy everything
897       else from C.
898     */
899     Abops   = A->bops;
900     Aops    = A->ops;
901     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
902     A->bops = Abops;
903     A->ops  = Aops;
904 
905     PetscHeaderDestroy(B);
906   }
907   PetscFunctionReturn(0);
908 }
909 
910 #include "pinclude/blaslapack.h"
911 #undef __FUNC__
912 #define __FUNC__ "MatScale_MPIDense"
913 int MatScale_MPIDense(Scalar *alpha,Mat inA)
914 {
915   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
916   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
917   int          one = 1, nz;
918 
919   PetscFunctionBegin;
920   nz = a->m*a->n;
921   BLscal_( &nz, alpha, a->v, &one );
922   PLogFlops(nz);
923   PetscFunctionReturn(0);
924 }
925 
926 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
927 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
928 
929 /* -------------------------------------------------------------------*/
930 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
931        MatGetRow_MPIDense,
932        MatRestoreRow_MPIDense,
933        MatMult_MPIDense,
934        MatMultAdd_MPIDense,
935        MatMultTrans_MPIDense,
936        MatMultTransAdd_MPIDense,
937        0,
938        0,
939        0,
940        0,
941        0,
942        0,
943        0,
944        MatTranspose_MPIDense,
945        MatGetInfo_MPIDense,0,
946        MatGetDiagonal_MPIDense,
947        MatDiagonalScale_MPIDense,
948        MatNorm_MPIDense,
949        MatAssemblyBegin_MPIDense,
950        MatAssemblyEnd_MPIDense,
951        0,
952        MatSetOption_MPIDense,
953        MatZeroEntries_MPIDense,
954        MatZeroRows_MPIDense,
955        0,
956        0,
957        0,
958        0,
959        MatGetSize_MPIDense,
960        MatGetLocalSize_MPIDense,
961        MatGetOwnershipRange_MPIDense,
962        0,
963        0,
964        MatGetArray_MPIDense,
965        MatRestoreArray_MPIDense,
966        MatDuplicate_MPIDense,
967        0,
968        0,
969        0,
970        0,
971        0,
972        MatGetSubMatrices_MPIDense,
973        0,
974        MatGetValues_MPIDense,
975        0,
976        0,
977        MatScale_MPIDense,
978        0,
979        0,
980        0,
981        MatGetBlockSize_MPIDense,
982        0,
983        0,
984        0,
985        0,
986        0,
987        0,
988        0,
989        0,
990        0,
991        MatGetSubMatrix_MPIDense,
992        0,
993        0,
994        MatGetMaps_Petsc};
995 
996 #undef __FUNC__
997 #define __FUNC__ "MatCreateMPIDense"
998 /*@C
999    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1000 
1001    Collective on MPI_Comm
1002 
1003    Input Parameters:
1004 +  comm - MPI communicator
1005 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1006 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1007 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1008 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1009 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1010    to control all matrix memory allocation.
1011 
1012    Output Parameter:
1013 .  A - the matrix
1014 
1015    Notes:
1016    The dense format is fully compatible with standard Fortran 77
1017    storage by columns.
1018 
1019    The data input variable is intended primarily for Fortran programmers
1020    who wish to allocate their own matrix memory space.  Most users should
1021    set data=PETSC_NULL.
1022 
1023    The user MUST specify either the local or global matrix dimensions
1024    (possibly both).
1025 
1026    Level: intermediate
1027 
1028 .keywords: matrix, dense, parallel
1029 
1030 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1031 @*/
1032 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1033 {
1034   Mat          mat;
1035   Mat_MPIDense *a;
1036   int          ierr, i,flg;
1037 
1038   PetscFunctionBegin;
1039   /* Note:  For now, when data is specified above, this assumes the user correctly
1040    allocates the local dense storage space.  We should add error checking. */
1041 
1042   *A = 0;
1043   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
1044   PLogObjectCreate(mat);
1045   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1046   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1047   mat->ops->destroy = MatDestroy_MPIDense;
1048   mat->ops->view    = MatView_MPIDense;
1049   mat->factor       = 0;
1050   mat->mapping      = 0;
1051 
1052   a->factor       = 0;
1053   mat->insertmode = NOT_SET_VALUES;
1054   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
1055   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
1056 
1057   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1058 
1059   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1060   a->nvec = n;
1061 
1062   /* each row stores all columns */
1063   a->N = mat->N = N;
1064   a->M = mat->M = M;
1065   a->m = mat->m = m;
1066   a->n = mat->n = N;   /* NOTE: n == N */
1067 
1068   /* the information in the maps duplicates the information computed below, eventually
1069      we should remove the duplicate information that is not contained in the maps */
1070   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1071   ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr);
1072 
1073   /* build local table of row and column ownerships */
1074   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1075   a->cowners = a->rowners + a->size + 1;
1076   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1077   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1078   a->rowners[0] = 0;
1079   for ( i=2; i<=a->size; i++ ) {
1080     a->rowners[i] += a->rowners[i-1];
1081   }
1082   a->rstart = a->rowners[a->rank];
1083   a->rend   = a->rowners[a->rank+1];
1084   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1085   a->cowners[0] = 0;
1086   for ( i=2; i<=a->size; i++ ) {
1087     a->cowners[i] += a->cowners[i-1];
1088   }
1089 
1090   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1091   PLogObjectParent(mat,a->A);
1092 
1093   /* build cache for off array entries formed */
1094   a->donotstash = 0;
1095   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1096 
1097   /* stuff used for matrix vector multiply */
1098   a->lvec        = 0;
1099   a->Mvctx       = 0;
1100   a->roworiented = 1;
1101 
1102   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",
1103                                      "MatGetDiagonalBlock_MPIDense",
1104                                      (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1105 
1106   *A = mat;
1107   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1108   if (flg) {
1109     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNC__
1115 #define __FUNC__ "MatDuplicate_MPIDense"
1116 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1117 {
1118   Mat          mat;
1119   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1120   int          ierr;
1121   FactorCtx    *factor;
1122 
1123   PetscFunctionBegin;
1124   *newmat       = 0;
1125   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1126   PLogObjectCreate(mat);
1127   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1128   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1129   mat->ops->destroy = MatDestroy_MPIDense;
1130   mat->ops->view    = MatView_MPIDense;
1131   mat->factor       = A->factor;
1132   mat->assembled    = PETSC_TRUE;
1133 
1134   a->m = mat->m = oldmat->m;
1135   a->n = mat->n = oldmat->n;
1136   a->M = mat->M = oldmat->M;
1137   a->N = mat->N = oldmat->N;
1138   if (oldmat->factor) {
1139     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1140     /* copy factor contents ... add this code! */
1141   } else a->factor = 0;
1142 
1143   a->rstart       = oldmat->rstart;
1144   a->rend         = oldmat->rend;
1145   a->size         = oldmat->size;
1146   a->rank         = oldmat->rank;
1147   mat->insertmode = NOT_SET_VALUES;
1148   a->donotstash   = oldmat->donotstash;
1149   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1150   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1151   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1152   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1153 
1154   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1155   PLogObjectParent(mat,a->lvec);
1156   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1157   PLogObjectParent(mat,a->Mvctx);
1158   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1159   PLogObjectParent(mat,a->A);
1160   *newmat = mat;
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #include "sys.h"
1165 
1166 #undef __FUNC__
1167 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1168 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1169 {
1170   int        *rowners, i,size,rank,m,ierr,nz,j;
1171   Scalar     *array,*vals,*vals_ptr;
1172   MPI_Status status;
1173 
1174   PetscFunctionBegin;
1175   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1176   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1177 
1178   /* determine ownership of all rows */
1179   m          = M/size + ((M % size) > rank);
1180   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1181   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1182   rowners[0] = 0;
1183   for ( i=2; i<=size; i++ ) {
1184     rowners[i] += rowners[i-1];
1185   }
1186 
1187   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1188   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1189 
1190   if (!rank) {
1191     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1192 
1193     /* read in my part of the matrix numerical values  */
1194     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1195 
1196     /* insert into matrix-by row (this is why cannot directly read into array */
1197     vals_ptr = vals;
1198     for ( i=0; i<m; i++ ) {
1199       for ( j=0; j<N; j++ ) {
1200         array[i + j*m] = *vals_ptr++;
1201       }
1202     }
1203 
1204     /* read in other processors and ship out */
1205     for ( i=1; i<size; i++ ) {
1206       nz   = (rowners[i+1] - rowners[i])*N;
1207       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1208       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1209     }
1210   } else {
1211     /* receive numeric values */
1212     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1213 
1214     /* receive message of values*/
1215     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1216 
1217     /* insert into matrix-by row (this is why cannot directly read into array */
1218     vals_ptr = vals;
1219     for ( i=0; i<m; i++ ) {
1220       for ( j=0; j<N; j++ ) {
1221         array[i + j*m] = *vals_ptr++;
1222       }
1223     }
1224   }
1225   ierr = PetscFree(rowners);CHKERRQ(ierr);
1226   ierr = PetscFree(vals);CHKERRQ(ierr);
1227   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1228   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 
1233 #undef __FUNC__
1234 #define __FUNC__ "MatLoad_MPIDense"
1235 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1236 {
1237   Mat          A;
1238   Scalar       *vals,*svals;
1239   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1240   MPI_Status   status;
1241   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1242   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1243   int          tag = ((PetscObject)viewer)->tag;
1244   int          i, nz, ierr, j,rstart, rend, fd;
1245 
1246   PetscFunctionBegin;
1247   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1249   if (!rank) {
1250     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1251     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1252     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1253   }
1254 
1255   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1256   M = header[1]; N = header[2]; nz = header[3];
1257 
1258   /*
1259        Handle case where matrix is stored on disk as a dense matrix
1260   */
1261   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1262     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1263     PetscFunctionReturn(0);
1264   }
1265 
1266   /* determine ownership of all rows */
1267   m          = M/size + ((M % size) > rank);
1268   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1269   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1270   rowners[0] = 0;
1271   for ( i=2; i<=size; i++ ) {
1272     rowners[i] += rowners[i-1];
1273   }
1274   rstart = rowners[rank];
1275   rend   = rowners[rank+1];
1276 
1277   /* distribute row lengths to all processors */
1278   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1279   offlens = ourlens + (rend-rstart);
1280   if (!rank) {
1281     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1282     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1283     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1284     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1285     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1286     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1287   } else {
1288     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1289   }
1290 
1291   if (!rank) {
1292     /* calculate the number of nonzeros on each processor */
1293     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1294     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1295     for ( i=0; i<size; i++ ) {
1296       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1297         procsnz[i] += rowlengths[j];
1298       }
1299     }
1300     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1301 
1302     /* determine max buffer needed and allocate it */
1303     maxnz = 0;
1304     for ( i=0; i<size; i++ ) {
1305       maxnz = PetscMax(maxnz,procsnz[i]);
1306     }
1307     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1308 
1309     /* read in my part of the matrix column indices  */
1310     nz = procsnz[0];
1311     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1312     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1313 
1314     /* read in every one elses and ship off */
1315     for ( i=1; i<size; i++ ) {
1316       nz   = procsnz[i];
1317       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1318       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1319     }
1320     ierr = PetscFree(cols);CHKERRQ(ierr);
1321   } else {
1322     /* determine buffer space needed for message */
1323     nz = 0;
1324     for ( i=0; i<m; i++ ) {
1325       nz += ourlens[i];
1326     }
1327     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1328 
1329     /* receive message of column indices*/
1330     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1331     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1332     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1333   }
1334 
1335   /* loop over local rows, determining number of off diagonal entries */
1336   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1337   jj = 0;
1338   for ( i=0; i<m; i++ ) {
1339     for ( j=0; j<ourlens[i]; j++ ) {
1340       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1341       jj++;
1342     }
1343   }
1344 
1345   /* create our matrix */
1346   for ( i=0; i<m; i++ ) {
1347     ourlens[i] -= offlens[i];
1348   }
1349   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1350   A = *newmat;
1351   for ( i=0; i<m; i++ ) {
1352     ourlens[i] += offlens[i];
1353   }
1354 
1355   if (!rank) {
1356     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1357 
1358     /* read in my part of the matrix numerical values  */
1359     nz = procsnz[0];
1360     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1361 
1362     /* insert into matrix */
1363     jj      = rstart;
1364     smycols = mycols;
1365     svals   = vals;
1366     for ( i=0; i<m; i++ ) {
1367       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1368       smycols += ourlens[i];
1369       svals   += ourlens[i];
1370       jj++;
1371     }
1372 
1373     /* read in other processors and ship out */
1374     for ( i=1; i<size; i++ ) {
1375       nz   = procsnz[i];
1376       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1377       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1378     }
1379     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1380   } else {
1381     /* receive numeric values */
1382     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1383 
1384     /* receive message of values*/
1385     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1386     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1387     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1388 
1389     /* insert into matrix */
1390     jj      = rstart;
1391     smycols = mycols;
1392     svals   = vals;
1393     for ( i=0; i<m; i++ ) {
1394       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1395       smycols += ourlens[i];
1396       svals   += ourlens[i];
1397       jj++;
1398     }
1399   }
1400   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1401   ierr = PetscFree(vals);CHKERRQ(ierr);
1402   ierr = PetscFree(mycols);CHKERRQ(ierr);
1403   ierr = PetscFree(rowners);CHKERRQ(ierr);
1404 
1405   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1406   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 
1411 
1412 
1413 
1414