xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 6831982abb6453c2f3e25efecb5d0951d333371e)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.128 1999/10/13 20:37:17 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 EXTERN_C_BEGIN
13 #undef __FUNC__
14 #define __FUNC__ "MatGetDiagonalBlock_MPIDense"
15 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
16 {
17   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
18   int          m = mdn->m,rstart = mdn->rstart,rank,ierr;
19   Scalar       *array;
20   MPI_Comm     comm;
21 
22   PetscFunctionBegin;
23   if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported.");
24 
25   /* The reuse aspect is not implemented efficiently */
26   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
27 
28   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
29   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
30   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
32   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
33   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
34   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
35 
36   *iscopy = PETSC_TRUE;
37   PetscFunctionReturn(0);
38 }
39 EXTERN_C_END
40 
41 extern int MatSetUpMultiply_MPIDense(Mat);
42 
43 #undef __FUNC__
44 #define __FUNC__ "MatSetValues_MPIDense"
45 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
46 {
47   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
48   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
49   int          roworiented = A->roworiented;
50 
51   PetscFunctionBegin;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) continue;
54     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       if (roworiented) {
58         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
59       } else {
60         for ( j=0; j<n; j++ ) {
61           if (idxn[j] < 0) continue;
62           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
63           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
64         }
65       }
66     } else {
67       if ( !A->donotstash) {
68         if (roworiented) {
69           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
70         } else {
71           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
72         }
73       }
74     }
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 #undef __FUNC__
80 #define __FUNC__ "MatGetValues_MPIDense"
81 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
82 {
83   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
84   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
85 
86   PetscFunctionBegin;
87   for ( i=0; i<m; i++ ) {
88     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
89     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
90     if (idxm[i] >= rstart && idxm[i] < rend) {
91       row = idxm[i] - rstart;
92       for ( j=0; j<n; j++ ) {
93         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
94         if (idxn[j] >= mdn->N) {
95           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
96         }
97         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
98       }
99     } else {
100       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
101     }
102   }
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNC__
107 #define __FUNC__ "MatGetArray_MPIDense"
108 int MatGetArray_MPIDense(Mat A,Scalar **array)
109 {
110   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
111   int          ierr;
112 
113   PetscFunctionBegin;
114   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
115   PetscFunctionReturn(0);
116 }
117 
118 #undef __FUNC__
119 #define __FUNC__ "MatGetSubMatrix_MPIDense"
120 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
121 {
122   Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd;
123   Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data;
124   int          i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank;
125   Scalar       *av, *bv, *v = lmat->v;
126   Mat          newmat;
127 
128   PetscFunctionBegin;
129   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
130   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
131   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
132   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
133   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
134 
135   /* No parallel redistribution currently supported! Should really check each index set
136      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
137      original matrix! */
138 
139   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
140   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
141 
142   /* Check submatrix call */
143   if (scall == MAT_REUSE_MATRIX) {
144     /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */
145     /* Really need to test rows and column sizes! */
146     newmat = *B;
147   } else {
148     /* Create and fill new matrix */
149     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
150   }
151 
152   /* Now extract the data pointers and do the copy, column at a time */
153   newmatd = (Mat_MPIDense *) newmat->data;
154   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
155 
156   for ( i=0; i<ncols; i++ ) {
157     av = v + nlrows*icol[i];
158     for (j=0; j<nrows; j++ ) {
159       *bv++ = av[irow[j] - rstart];
160     }
161   }
162 
163   /* Assemble the matrices so that the correct flags are set */
164   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
166 
167   /* Free work space */
168   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
169   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
170   *B = newmat;
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNC__
175 #define __FUNC__ "MatRestoreArray_MPIDense"
176 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
177 {
178   PetscFunctionBegin;
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNC__
183 #define __FUNC__ "MatAssemblyBegin_MPIDense"
184 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
185 {
186   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
187   MPI_Comm     comm = mat->comm;
188   int          ierr,nstash,reallocs;
189   InsertMode   addv;
190 
191   PetscFunctionBegin;
192   /* make sure all processors are either in INSERTMODE or ADDMODE */
193   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
194   if (addv == (ADD_VALUES|INSERT_VALUES)) {
195     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
196   }
197   mat->insertmode = addv; /* in case this processor had no cache */
198 
199   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
200   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
201   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
202            nstash,reallocs);
203   PetscFunctionReturn(0);
204 }
205 
206 #undef __FUNC__
207 #define __FUNC__ "MatAssemblyEnd_MPIDense"
208 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
209 {
210   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
211   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
212   Scalar       *val;
213   InsertMode   addv=mat->insertmode;
214 
215   PetscFunctionBegin;
216   /*  wait on receives */
217   while (1) {
218     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
219     if (!flg) break;
220 
221     for ( i=0; i<n; ) {
222       /* Now identify the consecutive vals belonging to the same row */
223       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
224       if (j < n) ncols = j-i;
225       else       ncols = n-i;
226       /* Now assemble all these values with a single function call */
227       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
228       i = j;
229     }
230   }
231   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNC__
243 #define __FUNC__ "MatZeroEntries_MPIDense"
244 int MatZeroEntries_MPIDense(Mat A)
245 {
246   int          ierr;
247   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNC__
255 #define __FUNC__ "MatGetBlockSize_MPIDense"
256 int MatGetBlockSize_MPIDense(Mat A,int *bs)
257 {
258   PetscFunctionBegin;
259   *bs = 1;
260   PetscFunctionReturn(0);
261 }
262 
263 /* the code does not do the diagonal entries correctly unless the
264    matrix is square and the column and row owerships are identical.
265    This is a BUG. The only way to fix it seems to be to access
266    mdn->A and mdn->B directly and not through the MatZeroRows()
267    routine.
268 */
269 #undef __FUNC__
270 #define __FUNC__ "MatZeroRows_MPIDense"
271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
272 {
273   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
274   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
275   int            *procs,*nprocs,j,found,idx,nsends,*work;
276   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
277   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
278   int            *lens,imdex,*lrows,*values;
279   MPI_Comm       comm = A->comm;
280   MPI_Request    *send_waits,*recv_waits;
281   MPI_Status     recv_status,*send_status;
282   IS             istmp;
283 
284   PetscFunctionBegin;
285   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
286   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
287 
288   /*  first count number of contributors to each processor */
289   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
290   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
291   procs  = nprocs + size;
292   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
293   for ( i=0; i<N; i++ ) {
294     idx = rows[i];
295     found = 0;
296     for ( j=0; j<size; j++ ) {
297       if (idx >= owners[j] && idx < owners[j+1]) {
298         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
299       }
300     }
301     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
302   }
303   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
304 
305   /* inform other processors of number of messages and max length*/
306   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
307   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
308   nmax   = work[rank];
309   nrecvs = work[size+rank];
310   ierr   = PetscFree(work);CHKERRQ(ierr);
311 
312   /* post receives:   */
313   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
314   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
315   for ( i=0; i<nrecvs; i++ ) {
316     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
317   }
318 
319   /* do sends:
320       1) starts[i] gives the starting index in svalues for stuff going to
321          the ith processor
322   */
323   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
324   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
325   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
326   starts[0]  = 0;
327   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
328   for ( i=0; i<N; i++ ) {
329     svalues[starts[owner[i]]++] = rows[i];
330   }
331   ISRestoreIndices(is,&rows);
332 
333   starts[0] = 0;
334   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
335   count = 0;
336   for ( i=0; i<size; i++ ) {
337     if (procs[i]) {
338       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
339     }
340   }
341   ierr = PetscFree(starts);CHKERRQ(ierr);
342 
343   base = owners[rank];
344 
345   /*  wait on receives */
346   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
347   source = lens + nrecvs;
348   count  = nrecvs; slen = 0;
349   while (count) {
350     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
351     /* unpack receives into our local space */
352     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
353     source[imdex]  = recv_status.MPI_SOURCE;
354     lens[imdex]  = n;
355     slen += n;
356     count--;
357   }
358   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
359 
360   /* move the data into the send scatter */
361   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
362   count = 0;
363   for ( i=0; i<nrecvs; i++ ) {
364     values = rvalues + i*nmax;
365     for ( j=0; j<lens[i]; j++ ) {
366       lrows[count++] = values[j] - base;
367     }
368   }
369   ierr = PetscFree(rvalues);CHKERRQ(ierr);
370   ierr = PetscFree(lens);CHKERRQ(ierr);
371   ierr = PetscFree(owner);CHKERRQ(ierr);
372   ierr = PetscFree(nprocs);CHKERRQ(ierr);
373 
374   /* actually zap the local rows */
375   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
376   PLogObjectParent(A,istmp);
377   ierr = PetscFree(lrows);CHKERRQ(ierr);
378   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
379   ierr = ISDestroy(istmp);CHKERRQ(ierr);
380 
381   /* wait on sends */
382   if (nsends) {
383     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
384     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
385     ierr = PetscFree(send_status);CHKERRQ(ierr);
386   }
387   ierr = PetscFree(send_waits);CHKERRQ(ierr);
388   ierr = PetscFree(svalues);CHKERRQ(ierr);
389 
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNC__
394 #define __FUNC__ "MatMult_MPIDense"
395 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
396 {
397   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
398   int          ierr;
399 
400   PetscFunctionBegin;
401   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
402   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
403   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
404   PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNC__
408 #define __FUNC__ "MatMultAdd_MPIDense"
409 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
410 {
411   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
412   int          ierr;
413 
414   PetscFunctionBegin;
415   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
416   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
417   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNC__
422 #define __FUNC__ "MatMultTrans_MPIDense"
423 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
424 {
425   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
426   int          ierr;
427   Scalar       zero = 0.0;
428 
429   PetscFunctionBegin;
430   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
431   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
432   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
433   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
434   PetscFunctionReturn(0);
435 }
436 
437 #undef __FUNC__
438 #define __FUNC__ "MatMultTransAdd_MPIDense"
439 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
440 {
441   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
442   int          ierr;
443 
444   PetscFunctionBegin;
445   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
446   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
447   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
448   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNC__
453 #define __FUNC__ "MatGetDiagonal_MPIDense"
454 int MatGetDiagonal_MPIDense(Mat A,Vec v)
455 {
456   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
457   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
458   int          ierr, len, i, n, m = a->m, radd;
459   Scalar       *x, zero = 0.0;
460 
461   PetscFunctionBegin;
462   VecSet(&zero,v);
463   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
464   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
465   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
466   len = PetscMin(aloc->m,aloc->n);
467   radd = a->rstart*m;
468   for ( i=0; i<len; i++ ) {
469     x[i] = aloc->v[radd + i*m + i];
470   }
471   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNC__
476 #define __FUNC__ "MatDestroy_MPIDense"
477 int MatDestroy_MPIDense(Mat mat)
478 {
479   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
480   int          ierr;
481 
482   PetscFunctionBegin;
483 
484   if (mat->mapping) {
485     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
486   }
487   if (mat->bmapping) {
488     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
489   }
490 #if defined(PETSC_USE_LOG)
491   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
492 #endif
493   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
494   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
495   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
496   if (mdn->lvec)   VecDestroy(mdn->lvec);
497   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
498   if (mdn->factor) {
499     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
500     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
501     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
502     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
503   }
504   ierr = PetscFree(mdn);CHKERRQ(ierr);
505   if (mat->rmap) {
506     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
507   }
508   if (mat->cmap) {
509     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
510   }
511   PLogObjectDestroy(mat);
512   PetscHeaderDestroy(mat);
513   PetscFunctionReturn(0);
514 }
515 
516 #undef __FUNC__
517 #define __FUNC__ "MatView_MPIDense_Binary"
518 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
519 {
520   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
521   int          ierr;
522 
523   PetscFunctionBegin;
524   if (mdn->size == 1) {
525     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
526   }
527   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
528   PetscFunctionReturn(0);
529 }
530 
531 #undef __FUNC__
532 #define __FUNC__ "MatView_MPIDense_ASCII"
533 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
534 {
535   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
536   int          ierr, format, size = mdn->size, rank = mdn->rank;
537   FILE         *fd;
538   ViewerType   vtype;
539 
540   PetscFunctionBegin;
541   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
542   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
543   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
544   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
545     MatInfo info;
546     ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
547     ierr = ViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
548          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
549     ierr = ViewerFlush(viewer);CHKERRQ(ierr);
550     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
551     PetscFunctionReturn(0);
552   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
553     PetscFunctionReturn(0);
554   }
555 
556   if (size == 1) {
557     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
558   } else {
559     /* assemble the entire matrix onto first processor. */
560     Mat          A;
561     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
562     Scalar       *vals;
563     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
564 
565     if (!rank) {
566       ierr = MatCreateMPIDense(mat->comm,M,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
567     } else {
568       ierr = MatCreateMPIDense(mat->comm,0,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
569     }
570     PLogObjectParent(mat,A);
571 
572     /* Copy the matrix ... This isn't the most efficient means,
573        but it's quick for now */
574     row = mdn->rstart; m = Amdn->m;
575     for ( i=0; i<m; i++ ) {
576       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
577       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
578       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
579       row++;
580     }
581 
582     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
583     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
584     if (!rank) {
585       Viewer sviewer;
586       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
587       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
588       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
589     }
590     ierr = ViewerFlush(viewer);CHKERRQ(ierr);
591     ierr = MatDestroy(A);CHKERRQ(ierr);
592   }
593   PetscFunctionReturn(0);
594 }
595 
596 #undef __FUNC__
597 #define __FUNC__ "MatView_MPIDense"
598 int MatView_MPIDense(Mat mat,Viewer viewer)
599 {
600   int        ierr;
601   PetscTruth isascii,isbinary;
602 
603   PetscFunctionBegin;
604 
605   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
606   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
607 
608   if (isascii) {
609     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
610   } else if (isbinary) {
611     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
612   } else {
613     SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
614   }
615   PetscFunctionReturn(0);
616 }
617 
618 #undef __FUNC__
619 #define __FUNC__ "MatGetInfo_MPIDense"
620 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
621 {
622   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
623   Mat          mdn = mat->A;
624   int          ierr;
625   double       isend[5], irecv[5];
626 
627   PetscFunctionBegin;
628   info->rows_global    = (double)mat->M;
629   info->columns_global = (double)mat->N;
630   info->rows_local     = (double)mat->m;
631   info->columns_local  = (double)mat->N;
632   info->block_size     = 1.0;
633   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
634   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
635   isend[3] = info->memory;  isend[4] = info->mallocs;
636   if (flag == MAT_LOCAL) {
637     info->nz_used      = isend[0];
638     info->nz_allocated = isend[1];
639     info->nz_unneeded  = isend[2];
640     info->memory       = isend[3];
641     info->mallocs      = isend[4];
642   } else if (flag == MAT_GLOBAL_MAX) {
643     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
644     info->nz_used      = irecv[0];
645     info->nz_allocated = irecv[1];
646     info->nz_unneeded  = irecv[2];
647     info->memory       = irecv[3];
648     info->mallocs      = irecv[4];
649   } else if (flag == MAT_GLOBAL_SUM) {
650     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
651     info->nz_used      = irecv[0];
652     info->nz_allocated = irecv[1];
653     info->nz_unneeded  = irecv[2];
654     info->memory       = irecv[3];
655     info->mallocs      = irecv[4];
656   }
657   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
658   info->fill_ratio_needed = 0;
659   info->factor_mallocs    = 0;
660   PetscFunctionReturn(0);
661 }
662 
663 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
664    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
665    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
666    extern int MatSolve_MPIDense(Mat,Vec,Vec);
667    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
668    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
669    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
670 
671 #undef __FUNC__
672 #define __FUNC__ "MatSetOption_MPIDense"
673 int MatSetOption_MPIDense(Mat A,MatOption op)
674 {
675   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
676 
677   PetscFunctionBegin;
678   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
679       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
680       op == MAT_NEW_NONZERO_LOCATION_ERR ||
681       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
682       op == MAT_COLUMNS_SORTED ||
683       op == MAT_COLUMNS_UNSORTED) {
684         MatSetOption(a->A,op);
685   } else if (op == MAT_ROW_ORIENTED) {
686         a->roworiented = 1;
687         MatSetOption(a->A,op);
688   } else if (op == MAT_ROWS_SORTED ||
689              op == MAT_ROWS_UNSORTED ||
690              op == MAT_SYMMETRIC ||
691              op == MAT_STRUCTURALLY_SYMMETRIC ||
692              op == MAT_YES_NEW_DIAGONALS ||
693              op == MAT_USE_HASH_TABLE) {
694     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
695   } else if (op == MAT_COLUMN_ORIENTED) {
696     a->roworiented = 0; MatSetOption(a->A,op);
697   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
698     a->donotstash = 1;
699   } else if (op == MAT_NO_NEW_DIAGONALS) {
700     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
701   } else {
702     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNC__
708 #define __FUNC__ "MatGetSize_MPIDense"
709 int MatGetSize_MPIDense(Mat A,int *m,int *n)
710 {
711   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
712 
713   PetscFunctionBegin;
714   *m = mat->M; *n = mat->N;
715   PetscFunctionReturn(0);
716 }
717 
718 #undef __FUNC__
719 #define __FUNC__ "MatGetLocalSize_MPIDense"
720 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
721 {
722   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
723 
724   PetscFunctionBegin;
725   *m = mat->m; *n = mat->N;
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNC__
730 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
731 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
732 {
733   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
734 
735   PetscFunctionBegin;
736   *m = mat->rstart; *n = mat->rend;
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNC__
741 #define __FUNC__ "MatGetRow_MPIDense"
742 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
743 {
744   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
745   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
746 
747   PetscFunctionBegin;
748   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
749   lrow = row - rstart;
750   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
751   PetscFunctionReturn(0);
752 }
753 
754 #undef __FUNC__
755 #define __FUNC__ "MatRestoreRow_MPIDense"
756 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
757 {
758   int ierr;
759 
760   PetscFunctionBegin;
761   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
762   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNC__
767 #define __FUNC__ "MatDiagonalScale_MPIDense"
768 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
769 {
770   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
771   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
772   Scalar       *l,*r,x,*v;
773   int          ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n;
774 
775   PetscFunctionBegin;
776   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
777   if (ll) {
778     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
779     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size");
780     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
781     for ( i=0; i<m; i++ ) {
782       x = l[i];
783       v = mat->v + i;
784       for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;}
785     }
786     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
787     PLogFlops(n*m);
788   }
789   if (rr) {
790     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
791     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size");
792     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
793     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
794     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
795     for ( i=0; i<n; i++ ) {
796       x = r[i];
797       v = mat->v + i*m;
798       for ( j=0; j<m; j++ ) { (*v++) *= x;}
799     }
800     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
801     PLogFlops(n*m);
802   }
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNC__
807 #define __FUNC__ "MatNorm_MPIDense"
808 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
809 {
810   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
811   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
812   int          ierr, i, j;
813   double       sum = 0.0;
814   Scalar       *v = mat->v;
815 
816   PetscFunctionBegin;
817   if (mdn->size == 1) {
818     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
819   } else {
820     if (type == NORM_FROBENIUS) {
821       for (i=0; i<mat->n*mat->m; i++ ) {
822 #if defined(PETSC_USE_COMPLEX)
823         sum += PetscReal(PetscConj(*v)*(*v)); v++;
824 #else
825         sum += (*v)*(*v); v++;
826 #endif
827       }
828       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
829       *norm = sqrt(*norm);
830       PLogFlops(2*mat->n*mat->m);
831     } else if (type == NORM_1) {
832       double *tmp, *tmp2;
833       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
834       tmp2 = tmp + mdn->N;
835       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
836       *norm = 0.0;
837       v = mat->v;
838       for ( j=0; j<mat->n; j++ ) {
839         for ( i=0; i<mat->m; i++ ) {
840           tmp[j] += PetscAbsScalar(*v);  v++;
841         }
842       }
843       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
844       for ( j=0; j<mdn->N; j++ ) {
845         if (tmp2[j] > *norm) *norm = tmp2[j];
846       }
847       ierr = PetscFree(tmp);CHKERRQ(ierr);
848       PLogFlops(mat->n*mat->m);
849     } else if (type == NORM_INFINITY) { /* max row norm */
850       double ntemp;
851       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
852       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
853     } else {
854       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
855     }
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNC__
861 #define __FUNC__ "MatTranspose_MPIDense"
862 int MatTranspose_MPIDense(Mat A,Mat *matout)
863 {
864   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
865   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
866   Mat          B;
867   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
868   int          j, i, ierr;
869   Scalar       *v;
870 
871   PetscFunctionBegin;
872   if (matout == PETSC_NULL && M != N) {
873     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
874   }
875   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
876 
877   m = Aloc->m; n = Aloc->n; v = Aloc->v;
878   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
879   for ( j=0; j<n; j++ ) {
880     for (i=0; i<m; i++) rwork[i] = rstart + i;
881     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
882     v   += m;
883   }
884   ierr = PetscFree(rwork);CHKERRQ(ierr);
885   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
886   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887   if (matout != PETSC_NULL) {
888     *matout = B;
889   } else {
890     PetscOps *Abops;
891     MatOps   Aops;
892 
893     /* This isn't really an in-place transpose, but free data struct from a */
894     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
895     ierr = MatDestroy(a->A);CHKERRQ(ierr);
896     if (a->lvec) VecDestroy(a->lvec);
897     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
898     ierr = PetscFree(a);CHKERRQ(ierr);
899 
900     /*
901          This is horrible, horrible code. We need to keep the
902       A pointers for the bops and ops but copy everything
903       else from C.
904     */
905     Abops   = A->bops;
906     Aops    = A->ops;
907     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
908     A->bops = Abops;
909     A->ops  = Aops;
910 
911     PetscHeaderDestroy(B);
912   }
913   PetscFunctionReturn(0);
914 }
915 
916 #include "pinclude/blaslapack.h"
917 #undef __FUNC__
918 #define __FUNC__ "MatScale_MPIDense"
919 int MatScale_MPIDense(Scalar *alpha,Mat inA)
920 {
921   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
922   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
923   int          one = 1, nz;
924 
925   PetscFunctionBegin;
926   nz = a->m*a->n;
927   BLscal_( &nz, alpha, a->v, &one );
928   PLogFlops(nz);
929   PetscFunctionReturn(0);
930 }
931 
932 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
933 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
934 
935 /* -------------------------------------------------------------------*/
936 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
937        MatGetRow_MPIDense,
938        MatRestoreRow_MPIDense,
939        MatMult_MPIDense,
940        MatMultAdd_MPIDense,
941        MatMultTrans_MPIDense,
942        MatMultTransAdd_MPIDense,
943        0,
944        0,
945        0,
946        0,
947        0,
948        0,
949        0,
950        MatTranspose_MPIDense,
951        MatGetInfo_MPIDense,0,
952        MatGetDiagonal_MPIDense,
953        MatDiagonalScale_MPIDense,
954        MatNorm_MPIDense,
955        MatAssemblyBegin_MPIDense,
956        MatAssemblyEnd_MPIDense,
957        0,
958        MatSetOption_MPIDense,
959        MatZeroEntries_MPIDense,
960        MatZeroRows_MPIDense,
961        0,
962        0,
963        0,
964        0,
965        MatGetSize_MPIDense,
966        MatGetLocalSize_MPIDense,
967        MatGetOwnershipRange_MPIDense,
968        0,
969        0,
970        MatGetArray_MPIDense,
971        MatRestoreArray_MPIDense,
972        MatDuplicate_MPIDense,
973        0,
974        0,
975        0,
976        0,
977        0,
978        MatGetSubMatrices_MPIDense,
979        0,
980        MatGetValues_MPIDense,
981        0,
982        0,
983        MatScale_MPIDense,
984        0,
985        0,
986        0,
987        MatGetBlockSize_MPIDense,
988        0,
989        0,
990        0,
991        0,
992        0,
993        0,
994        0,
995        0,
996        0,
997        MatGetSubMatrix_MPIDense,
998        0,
999        0,
1000        MatGetMaps_Petsc};
1001 
1002 #undef __FUNC__
1003 #define __FUNC__ "MatCreateMPIDense"
1004 /*@C
1005    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1006 
1007    Collective on MPI_Comm
1008 
1009    Input Parameters:
1010 +  comm - MPI communicator
1011 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1012 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1013 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1014 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1015 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1016    to control all matrix memory allocation.
1017 
1018    Output Parameter:
1019 .  A - the matrix
1020 
1021    Notes:
1022    The dense format is fully compatible with standard Fortran 77
1023    storage by columns.
1024 
1025    The data input variable is intended primarily for Fortran programmers
1026    who wish to allocate their own matrix memory space.  Most users should
1027    set data=PETSC_NULL.
1028 
1029    The user MUST specify either the local or global matrix dimensions
1030    (possibly both).
1031 
1032    Level: intermediate
1033 
1034 .keywords: matrix, dense, parallel
1035 
1036 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1037 @*/
1038 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1039 {
1040   Mat          mat;
1041   Mat_MPIDense *a;
1042   int          ierr, i,flg;
1043 
1044   PetscFunctionBegin;
1045   /* Note:  For now, when data is specified above, this assumes the user correctly
1046    allocates the local dense storage space.  We should add error checking. */
1047 
1048   *A = 0;
1049   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
1050   PLogObjectCreate(mat);
1051   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1052   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1053   mat->ops->destroy = MatDestroy_MPIDense;
1054   mat->ops->view    = MatView_MPIDense;
1055   mat->factor       = 0;
1056   mat->mapping      = 0;
1057 
1058   a->factor       = 0;
1059   mat->insertmode = NOT_SET_VALUES;
1060   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
1061   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
1062 
1063   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1064 
1065   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1066   a->nvec = n;
1067 
1068   /* each row stores all columns */
1069   a->N = mat->N = N;
1070   a->M = mat->M = M;
1071   a->m = mat->m = m;
1072   a->n = mat->n = N;   /* NOTE: n == N */
1073 
1074   /* the information in the maps duplicates the information computed below, eventually
1075      we should remove the duplicate information that is not contained in the maps */
1076   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1077   ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr);
1078 
1079   /* build local table of row and column ownerships */
1080   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1081   a->cowners = a->rowners + a->size + 1;
1082   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1083   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1084   a->rowners[0] = 0;
1085   for ( i=2; i<=a->size; i++ ) {
1086     a->rowners[i] += a->rowners[i-1];
1087   }
1088   a->rstart = a->rowners[a->rank];
1089   a->rend   = a->rowners[a->rank+1];
1090   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1091   a->cowners[0] = 0;
1092   for ( i=2; i<=a->size; i++ ) {
1093     a->cowners[i] += a->cowners[i-1];
1094   }
1095 
1096   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1097   PLogObjectParent(mat,a->A);
1098 
1099   /* build cache for off array entries formed */
1100   a->donotstash = 0;
1101   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1102 
1103   /* stuff used for matrix vector multiply */
1104   a->lvec        = 0;
1105   a->Mvctx       = 0;
1106   a->roworiented = 1;
1107 
1108   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",
1109                                      "MatGetDiagonalBlock_MPIDense",
1110                                      (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1111 
1112   *A = mat;
1113   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1114   if (flg) {
1115     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1116   }
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNC__
1121 #define __FUNC__ "MatDuplicate_MPIDense"
1122 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1123 {
1124   Mat          mat;
1125   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1126   int          ierr;
1127   FactorCtx    *factor;
1128 
1129   PetscFunctionBegin;
1130   *newmat       = 0;
1131   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1132   PLogObjectCreate(mat);
1133   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1134   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1135   mat->ops->destroy = MatDestroy_MPIDense;
1136   mat->ops->view    = MatView_MPIDense;
1137   mat->factor       = A->factor;
1138   mat->assembled    = PETSC_TRUE;
1139 
1140   a->m = mat->m = oldmat->m;
1141   a->n = mat->n = oldmat->n;
1142   a->M = mat->M = oldmat->M;
1143   a->N = mat->N = oldmat->N;
1144   if (oldmat->factor) {
1145     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1146     /* copy factor contents ... add this code! */
1147   } else a->factor = 0;
1148 
1149   a->rstart       = oldmat->rstart;
1150   a->rend         = oldmat->rend;
1151   a->size         = oldmat->size;
1152   a->rank         = oldmat->rank;
1153   mat->insertmode = NOT_SET_VALUES;
1154   a->donotstash   = oldmat->donotstash;
1155   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1156   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1157   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1158   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1159 
1160   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1161   PLogObjectParent(mat,a->lvec);
1162   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1163   PLogObjectParent(mat,a->Mvctx);
1164   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1165   PLogObjectParent(mat,a->A);
1166   *newmat = mat;
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #include "sys.h"
1171 
1172 #undef __FUNC__
1173 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1174 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1175 {
1176   int        *rowners, i,size,rank,m,ierr,nz,j;
1177   Scalar     *array,*vals,*vals_ptr;
1178   MPI_Status status;
1179 
1180   PetscFunctionBegin;
1181   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1182   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1183 
1184   /* determine ownership of all rows */
1185   m          = M/size + ((M % size) > rank);
1186   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1187   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1188   rowners[0] = 0;
1189   for ( i=2; i<=size; i++ ) {
1190     rowners[i] += rowners[i-1];
1191   }
1192 
1193   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1194   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1195 
1196   if (!rank) {
1197     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1198 
1199     /* read in my part of the matrix numerical values  */
1200     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1201 
1202     /* insert into matrix-by row (this is why cannot directly read into array */
1203     vals_ptr = vals;
1204     for ( i=0; i<m; i++ ) {
1205       for ( j=0; j<N; j++ ) {
1206         array[i + j*m] = *vals_ptr++;
1207       }
1208     }
1209 
1210     /* read in other processors and ship out */
1211     for ( i=1; i<size; i++ ) {
1212       nz   = (rowners[i+1] - rowners[i])*N;
1213       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1214       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1215     }
1216   } else {
1217     /* receive numeric values */
1218     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1219 
1220     /* receive message of values*/
1221     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1222 
1223     /* insert into matrix-by row (this is why cannot directly read into array */
1224     vals_ptr = vals;
1225     for ( i=0; i<m; i++ ) {
1226       for ( j=0; j<N; j++ ) {
1227         array[i + j*m] = *vals_ptr++;
1228       }
1229     }
1230   }
1231   ierr = PetscFree(rowners);CHKERRQ(ierr);
1232   ierr = PetscFree(vals);CHKERRQ(ierr);
1233   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1234   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 
1239 #undef __FUNC__
1240 #define __FUNC__ "MatLoad_MPIDense"
1241 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1242 {
1243   Mat          A;
1244   Scalar       *vals,*svals;
1245   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1246   MPI_Status   status;
1247   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1248   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1249   int          tag = ((PetscObject)viewer)->tag;
1250   int          i, nz, ierr, j,rstart, rend, fd;
1251 
1252   PetscFunctionBegin;
1253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1254   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1255   if (!rank) {
1256     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1257     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1258     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1259   }
1260 
1261   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1262   M = header[1]; N = header[2]; nz = header[3];
1263 
1264   /*
1265        Handle case where matrix is stored on disk as a dense matrix
1266   */
1267   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1268     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1269     PetscFunctionReturn(0);
1270   }
1271 
1272   /* determine ownership of all rows */
1273   m          = M/size + ((M % size) > rank);
1274   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1275   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1276   rowners[0] = 0;
1277   for ( i=2; i<=size; i++ ) {
1278     rowners[i] += rowners[i-1];
1279   }
1280   rstart = rowners[rank];
1281   rend   = rowners[rank+1];
1282 
1283   /* distribute row lengths to all processors */
1284   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1285   offlens = ourlens + (rend-rstart);
1286   if (!rank) {
1287     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1288     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1289     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1290     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1291     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1292     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1293   } else {
1294     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1295   }
1296 
1297   if (!rank) {
1298     /* calculate the number of nonzeros on each processor */
1299     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1300     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1301     for ( i=0; i<size; i++ ) {
1302       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1303         procsnz[i] += rowlengths[j];
1304       }
1305     }
1306     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1307 
1308     /* determine max buffer needed and allocate it */
1309     maxnz = 0;
1310     for ( i=0; i<size; i++ ) {
1311       maxnz = PetscMax(maxnz,procsnz[i]);
1312     }
1313     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1314 
1315     /* read in my part of the matrix column indices  */
1316     nz = procsnz[0];
1317     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1318     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1319 
1320     /* read in every one elses and ship off */
1321     for ( i=1; i<size; i++ ) {
1322       nz   = procsnz[i];
1323       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1324       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1325     }
1326     ierr = PetscFree(cols);CHKERRQ(ierr);
1327   } else {
1328     /* determine buffer space needed for message */
1329     nz = 0;
1330     for ( i=0; i<m; i++ ) {
1331       nz += ourlens[i];
1332     }
1333     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1334 
1335     /* receive message of column indices*/
1336     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1337     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1338     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1339   }
1340 
1341   /* loop over local rows, determining number of off diagonal entries */
1342   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1343   jj = 0;
1344   for ( i=0; i<m; i++ ) {
1345     for ( j=0; j<ourlens[i]; j++ ) {
1346       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1347       jj++;
1348     }
1349   }
1350 
1351   /* create our matrix */
1352   for ( i=0; i<m; i++ ) {
1353     ourlens[i] -= offlens[i];
1354   }
1355   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1356   A = *newmat;
1357   for ( i=0; i<m; i++ ) {
1358     ourlens[i] += offlens[i];
1359   }
1360 
1361   if (!rank) {
1362     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1363 
1364     /* read in my part of the matrix numerical values  */
1365     nz = procsnz[0];
1366     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1367 
1368     /* insert into matrix */
1369     jj      = rstart;
1370     smycols = mycols;
1371     svals   = vals;
1372     for ( i=0; i<m; i++ ) {
1373       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1374       smycols += ourlens[i];
1375       svals   += ourlens[i];
1376       jj++;
1377     }
1378 
1379     /* read in other processors and ship out */
1380     for ( i=1; i<size; i++ ) {
1381       nz   = procsnz[i];
1382       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1383       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1384     }
1385     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1386   } else {
1387     /* receive numeric values */
1388     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1389 
1390     /* receive message of values*/
1391     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1392     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1393     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1394 
1395     /* insert into matrix */
1396     jj      = rstart;
1397     smycols = mycols;
1398     svals   = vals;
1399     for ( i=0; i<m; i++ ) {
1400       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1401       smycols += ourlens[i];
1402       svals   += ourlens[i];
1403       jj++;
1404     }
1405   }
1406   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1407   ierr = PetscFree(vals);CHKERRQ(ierr);
1408   ierr = PetscFree(mycols);CHKERRQ(ierr);
1409   ierr = PetscFree(rowners);CHKERRQ(ierr);
1410 
1411   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 
1417 
1418 
1419 
1420