xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 0de54da66caf346695bba4a36088d86a0e34bf4d)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.122 1999/08/03 18:32:52 curfman Exp balay $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 EXTERN_C_BEGIN
13 #undef __FUNC__
14 #define __FUNC__ "MatGetDiagonalBlock_MPIDense"
15 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
16 {
17   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
18   int          m = mdn->m,rstart = mdn->rstart,rank,ierr;
19   Scalar       *array;
20   MPI_Comm     comm;
21 
22   PetscFunctionBegin;
23   if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported.");
24 
25   /* The reuse aspect is not implemented efficiently */
26   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
27 
28   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
29   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
30   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
32   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
33   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
34   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
35 
36   *iscopy = PETSC_TRUE;
37   PetscFunctionReturn(0);
38 }
39 EXTERN_C_END
40 
41 extern int MatSetUpMultiply_MPIDense(Mat);
42 
43 #undef __FUNC__
44 #define __FUNC__ "MatSetValues_MPIDense"
45 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
46 {
47   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
48   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
49   int          roworiented = A->roworiented;
50 
51   PetscFunctionBegin;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) continue;
54     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       if (roworiented) {
58         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
59       } else {
60         for ( j=0; j<n; j++ ) {
61           if (idxn[j] < 0) continue;
62           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
63           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
64         }
65       }
66     } else {
67       if ( !A->donotstash) {
68         if (roworiented) {
69           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
70         } else {
71           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
72         }
73       }
74     }
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 #undef __FUNC__
80 #define __FUNC__ "MatGetValues_MPIDense"
81 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
82 {
83   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
84   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
85 
86   PetscFunctionBegin;
87   for ( i=0; i<m; i++ ) {
88     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
89     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
90     if (idxm[i] >= rstart && idxm[i] < rend) {
91       row = idxm[i] - rstart;
92       for ( j=0; j<n; j++ ) {
93         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
94         if (idxn[j] >= mdn->N) {
95           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
96         }
97         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
98       }
99     } else {
100       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
101     }
102   }
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNC__
107 #define __FUNC__ "MatGetArray_MPIDense"
108 int MatGetArray_MPIDense(Mat A,Scalar **array)
109 {
110   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
111   int          ierr;
112 
113   PetscFunctionBegin;
114   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
115   PetscFunctionReturn(0);
116 }
117 
118 #undef __FUNC__
119 #define __FUNC__ "MatGetSubMatrix_MPIDense"
120 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
121 {
122   Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd;
123   Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data;
124   int          i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank;
125   Scalar       *av, *bv, *v = lmat->v;
126   Mat          newmat;
127 
128   PetscFunctionBegin;
129   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
130   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
131   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
132   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
133   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
134 
135   /* No parallel redistribution currently supported! Should really check each index set
136      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
137      original matrix! */
138 
139   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
140   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
141 
142   /* Check submatrix call */
143   if (scall == MAT_REUSE_MATRIX) {
144     /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */
145     /* Really need to test rows and column sizes! */
146     newmat = *B;
147   } else {
148     /* Create and fill new matrix */
149     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
150   }
151 
152   /* Now extract the data pointers and do the copy, column at a time */
153   newmatd = (Mat_MPIDense *) newmat->data;
154   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
155 
156   for ( i=0; i<ncols; i++ ) {
157     av = v + nlrows*icol[i];
158     for (j=0; j<nrows; j++ ) {
159       *bv++ = av[irow[j] - rstart];
160     }
161   }
162 
163   /* Assemble the matrices so that the correct flags are set */
164   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
166 
167   /* Free work space */
168   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
169   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
170   *B = newmat;
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNC__
175 #define __FUNC__ "MatRestoreArray_MPIDense"
176 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
177 {
178   PetscFunctionBegin;
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNC__
183 #define __FUNC__ "MatAssemblyBegin_MPIDense"
184 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
185 {
186   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
187   MPI_Comm     comm = mat->comm;
188   int          ierr,nstash,reallocs;
189   InsertMode   addv;
190 
191   PetscFunctionBegin;
192   /* make sure all processors are either in INSERTMODE or ADDMODE */
193   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
194   if (addv == (ADD_VALUES|INSERT_VALUES)) {
195     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
196   }
197   mat->insertmode = addv; /* in case this processor had no cache */
198 
199   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
200   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
201   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
202            nstash,reallocs);
203   PetscFunctionReturn(0);
204 }
205 
206 #undef __FUNC__
207 #define __FUNC__ "MatAssemblyEnd_MPIDense"
208 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
209 {
210   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
211   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
212   Scalar       *val;
213   InsertMode   addv=mat->insertmode;
214 
215   PetscFunctionBegin;
216   /*  wait on receives */
217   while (1) {
218     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
219     if (!flg) break;
220 
221     for ( i=0; i<n; ) {
222       /* Now identify the consecutive vals belonging to the same row */
223       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
224       if (j < n) ncols = j-i;
225       else       ncols = n-i;
226       /* Now assemble all these values with a single function call */
227       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
228       i = j;
229     }
230   }
231   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNC__
243 #define __FUNC__ "MatZeroEntries_MPIDense"
244 int MatZeroEntries_MPIDense(Mat A)
245 {
246   int          ierr;
247   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNC__
255 #define __FUNC__ "MatGetBlockSize_MPIDense"
256 int MatGetBlockSize_MPIDense(Mat A,int *bs)
257 {
258   PetscFunctionBegin;
259   *bs = 1;
260   PetscFunctionReturn(0);
261 }
262 
263 /* the code does not do the diagonal entries correctly unless the
264    matrix is square and the column and row owerships are identical.
265    This is a BUG. The only way to fix it seems to be to access
266    mdn->A and mdn->B directly and not through the MatZeroRows()
267    routine.
268 */
269 #undef __FUNC__
270 #define __FUNC__ "MatZeroRows_MPIDense"
271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
272 {
273   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
274   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
275   int            *procs,*nprocs,j,found,idx,nsends,*work;
276   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
277   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
278   int            *lens,imdex,*lrows,*values;
279   MPI_Comm       comm = A->comm;
280   MPI_Request    *send_waits,*recv_waits;
281   MPI_Status     recv_status,*send_status;
282   IS             istmp;
283 
284   PetscFunctionBegin;
285   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
286   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
287 
288   /*  first count number of contributors to each processor */
289   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
290   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
291   procs  = nprocs + size;
292   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
293   for ( i=0; i<N; i++ ) {
294     idx = rows[i];
295     found = 0;
296     for ( j=0; j<size; j++ ) {
297       if (idx >= owners[j] && idx < owners[j+1]) {
298         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
299       }
300     }
301     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
302   }
303   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
304 
305   /* inform other processors of number of messages and max length*/
306   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
307   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
308   nrecvs = work[rank];
309   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
310   nmax   = work[rank];
311   ierr = PetscFree(work);CHKERRQ(ierr);
312 
313   /* post receives:   */
314   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
315   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
316   for ( i=0; i<nrecvs; i++ ) {
317     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
318   }
319 
320   /* do sends:
321       1) starts[i] gives the starting index in svalues for stuff going to
322          the ith processor
323   */
324   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
325   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
326   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
327   starts[0]  = 0;
328   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
329   for ( i=0; i<N; i++ ) {
330     svalues[starts[owner[i]]++] = rows[i];
331   }
332   ISRestoreIndices(is,&rows);
333 
334   starts[0] = 0;
335   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
336   count = 0;
337   for ( i=0; i<size; i++ ) {
338     if (procs[i]) {
339       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
340     }
341   }
342   ierr = PetscFree(starts);CHKERRQ(ierr);
343 
344   base = owners[rank];
345 
346   /*  wait on receives */
347   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
348   source = lens + nrecvs;
349   count  = nrecvs; slen = 0;
350   while (count) {
351     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
352     /* unpack receives into our local space */
353     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
354     source[imdex]  = recv_status.MPI_SOURCE;
355     lens[imdex]  = n;
356     slen += n;
357     count--;
358   }
359   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
360 
361   /* move the data into the send scatter */
362   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
363   count = 0;
364   for ( i=0; i<nrecvs; i++ ) {
365     values = rvalues + i*nmax;
366     for ( j=0; j<lens[i]; j++ ) {
367       lrows[count++] = values[j] - base;
368     }
369   }
370   ierr = PetscFree(rvalues);CHKERRQ(ierr);
371   ierr = PetscFree(lens);CHKERRQ(ierr);
372   ierr = PetscFree(owner);CHKERRQ(ierr);
373   ierr = PetscFree(nprocs);CHKERRQ(ierr);
374 
375   /* actually zap the local rows */
376   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
377   PLogObjectParent(A,istmp);
378   ierr = PetscFree(lrows);CHKERRQ(ierr);
379   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
380   ierr = ISDestroy(istmp);CHKERRQ(ierr);
381 
382   /* wait on sends */
383   if (nsends) {
384     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
385     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
386     ierr = PetscFree(send_status);CHKERRQ(ierr);
387   }
388   ierr = PetscFree(send_waits);CHKERRQ(ierr);
389   ierr = PetscFree(svalues);CHKERRQ(ierr);
390 
391   PetscFunctionReturn(0);
392 }
393 
394 #undef __FUNC__
395 #define __FUNC__ "MatMult_MPIDense"
396 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
397 {
398   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
399   int          ierr;
400 
401   PetscFunctionBegin;
402   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
403   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
404   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNC__
409 #define __FUNC__ "MatMultAdd_MPIDense"
410 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
411 {
412   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
413   int          ierr;
414 
415   PetscFunctionBegin;
416   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
417   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
418   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 #undef __FUNC__
423 #define __FUNC__ "MatMultTrans_MPIDense"
424 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
425 {
426   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
427   int          ierr;
428   Scalar       zero = 0.0;
429 
430   PetscFunctionBegin;
431   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
432   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
433   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
434   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNC__
439 #define __FUNC__ "MatMultTransAdd_MPIDense"
440 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
441 {
442   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
443   int          ierr;
444 
445   PetscFunctionBegin;
446   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
447   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
448   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
449   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNC__
454 #define __FUNC__ "MatGetDiagonal_MPIDense"
455 int MatGetDiagonal_MPIDense(Mat A,Vec v)
456 {
457   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
458   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
459   int          ierr, len, i, n, m = a->m, radd;
460   Scalar       *x, zero = 0.0;
461 
462   PetscFunctionBegin;
463   VecSet(&zero,v);
464   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
465   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
466   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
467   len = PetscMin(aloc->m,aloc->n);
468   radd = a->rstart*m;
469   for ( i=0; i<len; i++ ) {
470     x[i] = aloc->v[radd + i*m + i];
471   }
472   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNC__
477 #define __FUNC__ "MatDestroy_MPIDense"
478 int MatDestroy_MPIDense(Mat mat)
479 {
480   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
481   int          ierr;
482 
483   PetscFunctionBegin;
484 
485   if (mat->mapping) {
486     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
487   }
488   if (mat->bmapping) {
489     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
490   }
491 #if defined(PETSC_USE_LOG)
492   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
493 #endif
494   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
495   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
496   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
497   if (mdn->lvec)   VecDestroy(mdn->lvec);
498   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
499   if (mdn->factor) {
500     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
501     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
502     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
503     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
504   }
505   ierr = PetscFree(mdn);CHKERRQ(ierr);
506   if (mat->rmap) {
507     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
508   }
509   if (mat->cmap) {
510     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
511   }
512   PLogObjectDestroy(mat);
513   PetscHeaderDestroy(mat);
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNC__
518 #define __FUNC__ "MatView_MPIDense_Binary"
519 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
520 {
521   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
522   int          ierr;
523 
524   PetscFunctionBegin;
525   if (mdn->size == 1) {
526     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
527   }
528   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
529   PetscFunctionReturn(0);
530 }
531 
532 #undef __FUNC__
533 #define __FUNC__ "MatView_MPIDense_ASCII"
534 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
535 {
536   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
537   int          ierr, format, size = mdn->size, rank = mdn->rank;
538   FILE         *fd;
539   ViewerType   vtype;
540 
541   PetscFunctionBegin;
542   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
543   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
544   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
545   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
546     MatInfo info;
547     ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
548     PetscSequentialPhaseBegin(mat->comm,1);
549       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
550          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
551       fflush(fd);
552     PetscSequentialPhaseEnd(mat->comm,1);
553     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
554     PetscFunctionReturn(0);
555   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
556     PetscFunctionReturn(0);
557   }
558 
559   if (size == 1) {
560     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
561   } else {
562     /* assemble the entire matrix onto first processor. */
563     Mat          A;
564     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
565     Scalar       *vals;
566     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
567 
568     if (!rank) {
569       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
570     } else {
571       ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
572     }
573     PLogObjectParent(mat,A);
574 
575     /* Copy the matrix ... This isn't the most efficient means,
576        but it's quick for now */
577     row = mdn->rstart; m = Amdn->m;
578     for ( i=0; i<m; i++ ) {
579       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
580       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
581       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
582       row++;
583     }
584 
585     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
586     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
587     if (!rank) {
588       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr);
589     }
590     ierr = MatDestroy(A);CHKERRQ(ierr);
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNC__
596 #define __FUNC__ "MatView_MPIDense"
597 int MatView_MPIDense(Mat mat,Viewer viewer)
598 {
599   int          ierr;
600   ViewerType   vtype;
601 
602   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
603   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
604     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
605   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
606     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
607   } else {
608     SETERRQ(1,1,"Viewer type not supported by PETSc object");
609   }
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNC__
614 #define __FUNC__ "MatGetInfo_MPIDense"
615 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
616 {
617   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
618   Mat          mdn = mat->A;
619   int          ierr;
620   double       isend[5], irecv[5];
621 
622   PetscFunctionBegin;
623   info->rows_global    = (double)mat->M;
624   info->columns_global = (double)mat->N;
625   info->rows_local     = (double)mat->m;
626   info->columns_local  = (double)mat->N;
627   info->block_size     = 1.0;
628   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
629   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
630   isend[3] = info->memory;  isend[4] = info->mallocs;
631   if (flag == MAT_LOCAL) {
632     info->nz_used      = isend[0];
633     info->nz_allocated = isend[1];
634     info->nz_unneeded  = isend[2];
635     info->memory       = isend[3];
636     info->mallocs      = isend[4];
637   } else if (flag == MAT_GLOBAL_MAX) {
638     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
639     info->nz_used      = irecv[0];
640     info->nz_allocated = irecv[1];
641     info->nz_unneeded  = irecv[2];
642     info->memory       = irecv[3];
643     info->mallocs      = irecv[4];
644   } else if (flag == MAT_GLOBAL_SUM) {
645     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
646     info->nz_used      = irecv[0];
647     info->nz_allocated = irecv[1];
648     info->nz_unneeded  = irecv[2];
649     info->memory       = irecv[3];
650     info->mallocs      = irecv[4];
651   }
652   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
653   info->fill_ratio_needed = 0;
654   info->factor_mallocs    = 0;
655   PetscFunctionReturn(0);
656 }
657 
658 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
659    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
660    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
661    extern int MatSolve_MPIDense(Mat,Vec,Vec);
662    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
663    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
664    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
665 
666 #undef __FUNC__
667 #define __FUNC__ "MatSetOption_MPIDense"
668 int MatSetOption_MPIDense(Mat A,MatOption op)
669 {
670   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
671 
672   PetscFunctionBegin;
673   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
674       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
675       op == MAT_NEW_NONZERO_LOCATION_ERR ||
676       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
677       op == MAT_COLUMNS_SORTED ||
678       op == MAT_COLUMNS_UNSORTED) {
679         MatSetOption(a->A,op);
680   } else if (op == MAT_ROW_ORIENTED) {
681         a->roworiented = 1;
682         MatSetOption(a->A,op);
683   } else if (op == MAT_ROWS_SORTED ||
684              op == MAT_ROWS_UNSORTED ||
685              op == MAT_SYMMETRIC ||
686              op == MAT_STRUCTURALLY_SYMMETRIC ||
687              op == MAT_YES_NEW_DIAGONALS ||
688              op == MAT_USE_HASH_TABLE) {
689     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
690   } else if (op == MAT_COLUMN_ORIENTED) {
691     a->roworiented = 0; MatSetOption(a->A,op);
692   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
693     a->donotstash = 1;
694   } else if (op == MAT_NO_NEW_DIAGONALS) {
695     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
696   } else {
697     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
698   }
699   PetscFunctionReturn(0);
700 }
701 
702 #undef __FUNC__
703 #define __FUNC__ "MatGetSize_MPIDense"
704 int MatGetSize_MPIDense(Mat A,int *m,int *n)
705 {
706   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
707 
708   PetscFunctionBegin;
709   *m = mat->M; *n = mat->N;
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNC__
714 #define __FUNC__ "MatGetLocalSize_MPIDense"
715 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
716 {
717   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
718 
719   PetscFunctionBegin;
720   *m = mat->m; *n = mat->N;
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNC__
725 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
726 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
727 {
728   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
729 
730   PetscFunctionBegin;
731   *m = mat->rstart; *n = mat->rend;
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNC__
736 #define __FUNC__ "MatGetRow_MPIDense"
737 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
738 {
739   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
740   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
741 
742   PetscFunctionBegin;
743   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
744   lrow = row - rstart;
745   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
746   PetscFunctionReturn(0);
747 }
748 
749 #undef __FUNC__
750 #define __FUNC__ "MatRestoreRow_MPIDense"
751 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
752 {
753   int ierr;
754 
755   PetscFunctionBegin;
756   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
757   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
758   PetscFunctionReturn(0);
759 }
760 
761 #undef __FUNC__
762 #define __FUNC__ "MatDiagonalScale_MPIDense"
763 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
764 {
765   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
766   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
767   Scalar       *l,*r,x,*v;
768   int          ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n;
769 
770   PetscFunctionBegin;
771   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
772   if (ll) {
773     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
774     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size");
775     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
776     for ( i=0; i<m; i++ ) {
777       x = l[i];
778       v = mat->v + i;
779       for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;}
780     }
781     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
782     PLogFlops(n*m);
783   }
784   if (rr) {
785     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
786     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size");
787     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
788     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
789     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
790     for ( i=0; i<n; i++ ) {
791       x = r[i];
792       v = mat->v + i*m;
793       for ( j=0; j<m; j++ ) { (*v++) *= x;}
794     }
795     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
796     PLogFlops(n*m);
797   }
798   PetscFunctionReturn(0);
799 }
800 
801 #undef __FUNC__
802 #define __FUNC__ "MatNorm_MPIDense"
803 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
804 {
805   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
806   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
807   int          ierr, i, j;
808   double       sum = 0.0;
809   Scalar       *v = mat->v;
810 
811   PetscFunctionBegin;
812   if (mdn->size == 1) {
813     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
814   } else {
815     if (type == NORM_FROBENIUS) {
816       for (i=0; i<mat->n*mat->m; i++ ) {
817 #if defined(PETSC_USE_COMPLEX)
818         sum += PetscReal(PetscConj(*v)*(*v)); v++;
819 #else
820         sum += (*v)*(*v); v++;
821 #endif
822       }
823       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
824       *norm = sqrt(*norm);
825       PLogFlops(2*mat->n*mat->m);
826     } else if (type == NORM_1) {
827       double *tmp, *tmp2;
828       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
829       tmp2 = tmp + mdn->N;
830       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
831       *norm = 0.0;
832       v = mat->v;
833       for ( j=0; j<mat->n; j++ ) {
834         for ( i=0; i<mat->m; i++ ) {
835           tmp[j] += PetscAbsScalar(*v);  v++;
836         }
837       }
838       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
839       for ( j=0; j<mdn->N; j++ ) {
840         if (tmp2[j] > *norm) *norm = tmp2[j];
841       }
842       ierr = PetscFree(tmp);CHKERRQ(ierr);
843       PLogFlops(mat->n*mat->m);
844     } else if (type == NORM_INFINITY) { /* max row norm */
845       double ntemp;
846       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
847       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
848     } else {
849       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
850     }
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 #undef __FUNC__
856 #define __FUNC__ "MatTranspose_MPIDense"
857 int MatTranspose_MPIDense(Mat A,Mat *matout)
858 {
859   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
860   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
861   Mat          B;
862   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
863   int          j, i, ierr;
864   Scalar       *v;
865 
866   PetscFunctionBegin;
867   if (matout == PETSC_NULL && M != N) {
868     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
869   }
870   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
871 
872   m = Aloc->m; n = Aloc->n; v = Aloc->v;
873   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
874   for ( j=0; j<n; j++ ) {
875     for (i=0; i<m; i++) rwork[i] = rstart + i;
876     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
877     v   += m;
878   }
879   ierr = PetscFree(rwork);CHKERRQ(ierr);
880   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
881   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
882   if (matout != PETSC_NULL) {
883     *matout = B;
884   } else {
885     PetscOps *Abops;
886     MatOps   Aops;
887 
888     /* This isn't really an in-place transpose, but free data struct from a */
889     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
890     ierr = MatDestroy(a->A);CHKERRQ(ierr);
891     if (a->lvec) VecDestroy(a->lvec);
892     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
893     ierr = PetscFree(a);CHKERRQ(ierr);
894 
895     /*
896          This is horrible, horrible code. We need to keep the
897       A pointers for the bops and ops but copy everything
898       else from C.
899     */
900     Abops   = A->bops;
901     Aops    = A->ops;
902     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
903     A->bops = Abops;
904     A->ops  = Aops;
905 
906     PetscHeaderDestroy(B);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 #include "pinclude/blaslapack.h"
912 #undef __FUNC__
913 #define __FUNC__ "MatScale_MPIDense"
914 int MatScale_MPIDense(Scalar *alpha,Mat inA)
915 {
916   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
917   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
918   int          one = 1, nz;
919 
920   PetscFunctionBegin;
921   nz = a->m*a->n;
922   BLscal_( &nz, alpha, a->v, &one );
923   PLogFlops(nz);
924   PetscFunctionReturn(0);
925 }
926 
927 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
928 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
929 
930 /* -------------------------------------------------------------------*/
931 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
932        MatGetRow_MPIDense,
933        MatRestoreRow_MPIDense,
934        MatMult_MPIDense,
935        MatMultAdd_MPIDense,
936        MatMultTrans_MPIDense,
937        MatMultTransAdd_MPIDense,
938        0,
939        0,
940        0,
941        0,
942        0,
943        0,
944        0,
945        MatTranspose_MPIDense,
946        MatGetInfo_MPIDense,0,
947        MatGetDiagonal_MPIDense,
948        MatDiagonalScale_MPIDense,
949        MatNorm_MPIDense,
950        MatAssemblyBegin_MPIDense,
951        MatAssemblyEnd_MPIDense,
952        0,
953        MatSetOption_MPIDense,
954        MatZeroEntries_MPIDense,
955        MatZeroRows_MPIDense,
956        0,
957        0,
958        0,
959        0,
960        MatGetSize_MPIDense,
961        MatGetLocalSize_MPIDense,
962        MatGetOwnershipRange_MPIDense,
963        0,
964        0,
965        MatGetArray_MPIDense,
966        MatRestoreArray_MPIDense,
967        MatDuplicate_MPIDense,
968        0,
969        0,
970        0,
971        0,
972        0,
973        MatGetSubMatrices_MPIDense,
974        0,
975        MatGetValues_MPIDense,
976        0,
977        0,
978        MatScale_MPIDense,
979        0,
980        0,
981        0,
982        MatGetBlockSize_MPIDense,
983        0,
984        0,
985        0,
986        0,
987        0,
988        0,
989        0,
990        0,
991        0,
992        MatGetSubMatrix_MPIDense,
993        0,
994        0,
995        MatGetMaps_Petsc};
996 
997 #undef __FUNC__
998 #define __FUNC__ "MatCreateMPIDense"
999 /*@C
1000    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1001 
1002    Collective on MPI_Comm
1003 
1004    Input Parameters:
1005 +  comm - MPI communicator
1006 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1007 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1008 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1009 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1010 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1011    to control all matrix memory allocation.
1012 
1013    Output Parameter:
1014 .  A - the matrix
1015 
1016    Notes:
1017    The dense format is fully compatible with standard Fortran 77
1018    storage by columns.
1019 
1020    The data input variable is intended primarily for Fortran programmers
1021    who wish to allocate their own matrix memory space.  Most users should
1022    set data=PETSC_NULL.
1023 
1024    The user MUST specify either the local or global matrix dimensions
1025    (possibly both).
1026 
1027    Currently, the only parallel dense matrix decomposition is by rows,
1028    so that n=N and each submatrix owns all of the global columns.
1029 
1030    Level: intermediate
1031 
1032 .keywords: matrix, dense, parallel
1033 
1034 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1035 @*/
1036 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1037 {
1038   Mat          mat;
1039   Mat_MPIDense *a;
1040   int          ierr, i,flg;
1041 
1042   PetscFunctionBegin;
1043   /* Note:  For now, when data is specified above, this assumes the user correctly
1044    allocates the local dense storage space.  We should add error checking. */
1045 
1046   *A = 0;
1047   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
1048   PLogObjectCreate(mat);
1049   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1050   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1051   mat->ops->destroy = MatDestroy_MPIDense;
1052   mat->ops->view    = MatView_MPIDense;
1053   mat->factor       = 0;
1054   mat->mapping      = 0;
1055 
1056   a->factor       = 0;
1057   mat->insertmode = NOT_SET_VALUES;
1058   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
1059   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
1060 
1061   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1062 
1063   /*
1064      The computation of n is wrong below, n should represent the number of local
1065      rows in the right (column vector)
1066   */
1067 
1068   /* each row stores all columns */
1069   if (N == PETSC_DECIDE) N = n;
1070   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1071   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
1072   a->N = mat->N = N;
1073   a->M = mat->M = M;
1074   a->m = mat->m = m;
1075   a->n = mat->n = n;
1076 
1077   /* the information in the maps duplicates the information computed below, eventually
1078      we should remove the duplicate information that is not contained in the maps */
1079   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1080   ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr);
1081 
1082   /* build local table of row and column ownerships */
1083   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1084   a->cowners = a->rowners + a->size + 1;
1085   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1086   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1087   a->rowners[0] = 0;
1088   for ( i=2; i<=a->size; i++ ) {
1089     a->rowners[i] += a->rowners[i-1];
1090   }
1091   a->rstart = a->rowners[a->rank];
1092   a->rend   = a->rowners[a->rank+1];
1093   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1094   a->cowners[0] = 0;
1095   for ( i=2; i<=a->size; i++ ) {
1096     a->cowners[i] += a->cowners[i-1];
1097   }
1098 
1099   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1100   PLogObjectParent(mat,a->A);
1101 
1102   /* build cache for off array entries formed */
1103   a->donotstash = 0;
1104   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1105 
1106   /* stuff used for matrix vector multiply */
1107   a->lvec        = 0;
1108   a->Mvctx       = 0;
1109   a->roworiented = 1;
1110 
1111   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",
1112                                      "MatGetDiagonalBlock_MPIDense",
1113                                      (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1114 
1115   *A = mat;
1116   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1117   if (flg) {
1118     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1119   }
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNC__
1124 #define __FUNC__ "MatDuplicate_MPIDense"
1125 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1126 {
1127   Mat          mat;
1128   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1129   int          ierr;
1130   FactorCtx    *factor;
1131 
1132   PetscFunctionBegin;
1133   *newmat       = 0;
1134   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1135   PLogObjectCreate(mat);
1136   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1137   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1138   mat->ops->destroy = MatDestroy_MPIDense;
1139   mat->ops->view    = MatView_MPIDense;
1140   mat->factor       = A->factor;
1141   mat->assembled    = PETSC_TRUE;
1142 
1143   a->m = mat->m = oldmat->m;
1144   a->n = mat->n = oldmat->n;
1145   a->M = mat->M = oldmat->M;
1146   a->N = mat->N = oldmat->N;
1147   if (oldmat->factor) {
1148     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1149     /* copy factor contents ... add this code! */
1150   } else a->factor = 0;
1151 
1152   a->rstart       = oldmat->rstart;
1153   a->rend         = oldmat->rend;
1154   a->size         = oldmat->size;
1155   a->rank         = oldmat->rank;
1156   mat->insertmode = NOT_SET_VALUES;
1157   a->donotstash   = oldmat->donotstash;
1158   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1159   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1160   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1161   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1162 
1163   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1164   PLogObjectParent(mat,a->lvec);
1165   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1166   PLogObjectParent(mat,a->Mvctx);
1167   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1168   PLogObjectParent(mat,a->A);
1169   *newmat = mat;
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #include "sys.h"
1174 
1175 #undef __FUNC__
1176 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1177 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1178 {
1179   int        *rowners, i,size,rank,m,ierr,nz,j;
1180   Scalar     *array,*vals,*vals_ptr;
1181   MPI_Status status;
1182 
1183   PetscFunctionBegin;
1184   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1185   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1186 
1187   /* determine ownership of all rows */
1188   m          = M/size + ((M % size) > rank);
1189   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1190   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1191   rowners[0] = 0;
1192   for ( i=2; i<=size; i++ ) {
1193     rowners[i] += rowners[i-1];
1194   }
1195 
1196   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1197   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1198 
1199   if (!rank) {
1200     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1201 
1202     /* read in my part of the matrix numerical values  */
1203     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1204 
1205     /* insert into matrix-by row (this is why cannot directly read into array */
1206     vals_ptr = vals;
1207     for ( i=0; i<m; i++ ) {
1208       for ( j=0; j<N; j++ ) {
1209         array[i + j*m] = *vals_ptr++;
1210       }
1211     }
1212 
1213     /* read in other processors and ship out */
1214     for ( i=1; i<size; i++ ) {
1215       nz   = (rowners[i+1] - rowners[i])*N;
1216       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1217       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1218     }
1219   } else {
1220     /* receive numeric values */
1221     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1222 
1223     /* receive message of values*/
1224     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1225 
1226     /* insert into matrix-by row (this is why cannot directly read into array */
1227     vals_ptr = vals;
1228     for ( i=0; i<m; i++ ) {
1229       for ( j=0; j<N; j++ ) {
1230         array[i + j*m] = *vals_ptr++;
1231       }
1232     }
1233   }
1234   ierr = PetscFree(rowners);CHKERRQ(ierr);
1235   ierr = PetscFree(vals);CHKERRQ(ierr);
1236   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1237   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 
1242 #undef __FUNC__
1243 #define __FUNC__ "MatLoad_MPIDense"
1244 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1245 {
1246   Mat          A;
1247   Scalar       *vals,*svals;
1248   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1249   MPI_Status   status;
1250   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1251   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1252   int          tag = ((PetscObject)viewer)->tag;
1253   int          i, nz, ierr, j,rstart, rend, fd;
1254 
1255   PetscFunctionBegin;
1256   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1257   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1258   if (!rank) {
1259     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1260     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1261     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1262   }
1263 
1264   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1265   M = header[1]; N = header[2]; nz = header[3];
1266 
1267   /*
1268        Handle case where matrix is stored on disk as a dense matrix
1269   */
1270   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1271     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1272     PetscFunctionReturn(0);
1273   }
1274 
1275   /* determine ownership of all rows */
1276   m          = M/size + ((M % size) > rank);
1277   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1278   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1279   rowners[0] = 0;
1280   for ( i=2; i<=size; i++ ) {
1281     rowners[i] += rowners[i-1];
1282   }
1283   rstart = rowners[rank];
1284   rend   = rowners[rank+1];
1285 
1286   /* distribute row lengths to all processors */
1287   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1288   offlens = ourlens + (rend-rstart);
1289   if (!rank) {
1290     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1291     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1292     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1293     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1294     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1295     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1296   } else {
1297     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1298   }
1299 
1300   if (!rank) {
1301     /* calculate the number of nonzeros on each processor */
1302     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1303     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1304     for ( i=0; i<size; i++ ) {
1305       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1306         procsnz[i] += rowlengths[j];
1307       }
1308     }
1309     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1310 
1311     /* determine max buffer needed and allocate it */
1312     maxnz = 0;
1313     for ( i=0; i<size; i++ ) {
1314       maxnz = PetscMax(maxnz,procsnz[i]);
1315     }
1316     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1317 
1318     /* read in my part of the matrix column indices  */
1319     nz = procsnz[0];
1320     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1321     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1322 
1323     /* read in every one elses and ship off */
1324     for ( i=1; i<size; i++ ) {
1325       nz   = procsnz[i];
1326       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1327       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1328     }
1329     ierr = PetscFree(cols);CHKERRQ(ierr);
1330   } else {
1331     /* determine buffer space needed for message */
1332     nz = 0;
1333     for ( i=0; i<m; i++ ) {
1334       nz += ourlens[i];
1335     }
1336     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1337 
1338     /* receive message of column indices*/
1339     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1340     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1341     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1342   }
1343 
1344   /* loop over local rows, determining number of off diagonal entries */
1345   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1346   jj = 0;
1347   for ( i=0; i<m; i++ ) {
1348     for ( j=0; j<ourlens[i]; j++ ) {
1349       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1350       jj++;
1351     }
1352   }
1353 
1354   /* create our matrix */
1355   for ( i=0; i<m; i++ ) {
1356     ourlens[i] -= offlens[i];
1357   }
1358   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1359   A = *newmat;
1360   for ( i=0; i<m; i++ ) {
1361     ourlens[i] += offlens[i];
1362   }
1363 
1364   if (!rank) {
1365     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1366 
1367     /* read in my part of the matrix numerical values  */
1368     nz = procsnz[0];
1369     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1370 
1371     /* insert into matrix */
1372     jj      = rstart;
1373     smycols = mycols;
1374     svals   = vals;
1375     for ( i=0; i<m; i++ ) {
1376       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1377       smycols += ourlens[i];
1378       svals   += ourlens[i];
1379       jj++;
1380     }
1381 
1382     /* read in other processors and ship out */
1383     for ( i=1; i<size; i++ ) {
1384       nz   = procsnz[i];
1385       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1386       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1387     }
1388     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1389   } else {
1390     /* receive numeric values */
1391     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1392 
1393     /* receive message of values*/
1394     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1395     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1396     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1397 
1398     /* insert into matrix */
1399     jj      = rstart;
1400     smycols = mycols;
1401     svals   = vals;
1402     for ( i=0; i<m; i++ ) {
1403       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1404       smycols += ourlens[i];
1405       svals   += ourlens[i];
1406       jj++;
1407     }
1408   }
1409   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1410   ierr = PetscFree(vals);CHKERRQ(ierr);
1411   ierr = PetscFree(mycols);CHKERRQ(ierr);
1412   ierr = PetscFree(rowners);CHKERRQ(ierr);
1413 
1414   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1415   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 
1420 
1421 
1422 
1423