xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision fd16b1779ca341c090fa91c269e2c2dd5dd0b73d)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.124 1999/09/02 14:53:19 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 EXTERN_C_BEGIN
13 #undef __FUNC__
14 #define __FUNC__ "MatGetDiagonalBlock_MPIDense"
15 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
16 {
17   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
18   int          m = mdn->m,rstart = mdn->rstart,rank,ierr;
19   Scalar       *array;
20   MPI_Comm     comm;
21 
22   PetscFunctionBegin;
23   if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported.");
24 
25   /* The reuse aspect is not implemented efficiently */
26   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
27 
28   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
29   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
30   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
32   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
33   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
34   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
35 
36   *iscopy = PETSC_TRUE;
37   PetscFunctionReturn(0);
38 }
39 EXTERN_C_END
40 
41 extern int MatSetUpMultiply_MPIDense(Mat);
42 
43 #undef __FUNC__
44 #define __FUNC__ "MatSetValues_MPIDense"
45 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
46 {
47   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
48   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
49   int          roworiented = A->roworiented;
50 
51   PetscFunctionBegin;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) continue;
54     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       if (roworiented) {
58         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
59       } else {
60         for ( j=0; j<n; j++ ) {
61           if (idxn[j] < 0) continue;
62           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
63           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
64         }
65       }
66     } else {
67       if ( !A->donotstash) {
68         if (roworiented) {
69           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
70         } else {
71           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
72         }
73       }
74     }
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 #undef __FUNC__
80 #define __FUNC__ "MatGetValues_MPIDense"
81 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
82 {
83   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
84   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
85 
86   PetscFunctionBegin;
87   for ( i=0; i<m; i++ ) {
88     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
89     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
90     if (idxm[i] >= rstart && idxm[i] < rend) {
91       row = idxm[i] - rstart;
92       for ( j=0; j<n; j++ ) {
93         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
94         if (idxn[j] >= mdn->N) {
95           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
96         }
97         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
98       }
99     } else {
100       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
101     }
102   }
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNC__
107 #define __FUNC__ "MatGetArray_MPIDense"
108 int MatGetArray_MPIDense(Mat A,Scalar **array)
109 {
110   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
111   int          ierr;
112 
113   PetscFunctionBegin;
114   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
115   PetscFunctionReturn(0);
116 }
117 
118 #undef __FUNC__
119 #define __FUNC__ "MatGetSubMatrix_MPIDense"
120 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
121 {
122   Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd;
123   Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data;
124   int          i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank;
125   Scalar       *av, *bv, *v = lmat->v;
126   Mat          newmat;
127 
128   PetscFunctionBegin;
129   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
130   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
131   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
132   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
133   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
134 
135   /* No parallel redistribution currently supported! Should really check each index set
136      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
137      original matrix! */
138 
139   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
140   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
141 
142   /* Check submatrix call */
143   if (scall == MAT_REUSE_MATRIX) {
144     /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */
145     /* Really need to test rows and column sizes! */
146     newmat = *B;
147   } else {
148     /* Create and fill new matrix */
149     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
150   }
151 
152   /* Now extract the data pointers and do the copy, column at a time */
153   newmatd = (Mat_MPIDense *) newmat->data;
154   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
155 
156   for ( i=0; i<ncols; i++ ) {
157     av = v + nlrows*icol[i];
158     for (j=0; j<nrows; j++ ) {
159       *bv++ = av[irow[j] - rstart];
160     }
161   }
162 
163   /* Assemble the matrices so that the correct flags are set */
164   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
166 
167   /* Free work space */
168   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
169   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
170   *B = newmat;
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNC__
175 #define __FUNC__ "MatRestoreArray_MPIDense"
176 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
177 {
178   PetscFunctionBegin;
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNC__
183 #define __FUNC__ "MatAssemblyBegin_MPIDense"
184 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
185 {
186   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
187   MPI_Comm     comm = mat->comm;
188   int          ierr,nstash,reallocs;
189   InsertMode   addv;
190 
191   PetscFunctionBegin;
192   /* make sure all processors are either in INSERTMODE or ADDMODE */
193   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
194   if (addv == (ADD_VALUES|INSERT_VALUES)) {
195     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
196   }
197   mat->insertmode = addv; /* in case this processor had no cache */
198 
199   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
200   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
201   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
202            nstash,reallocs);
203   PetscFunctionReturn(0);
204 }
205 
206 #undef __FUNC__
207 #define __FUNC__ "MatAssemblyEnd_MPIDense"
208 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
209 {
210   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
211   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
212   Scalar       *val;
213   InsertMode   addv=mat->insertmode;
214 
215   PetscFunctionBegin;
216   /*  wait on receives */
217   while (1) {
218     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
219     if (!flg) break;
220 
221     for ( i=0; i<n; ) {
222       /* Now identify the consecutive vals belonging to the same row */
223       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
224       if (j < n) ncols = j-i;
225       else       ncols = n-i;
226       /* Now assemble all these values with a single function call */
227       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
228       i = j;
229     }
230   }
231   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNC__
243 #define __FUNC__ "MatZeroEntries_MPIDense"
244 int MatZeroEntries_MPIDense(Mat A)
245 {
246   int          ierr;
247   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNC__
255 #define __FUNC__ "MatGetBlockSize_MPIDense"
256 int MatGetBlockSize_MPIDense(Mat A,int *bs)
257 {
258   PetscFunctionBegin;
259   *bs = 1;
260   PetscFunctionReturn(0);
261 }
262 
263 /* the code does not do the diagonal entries correctly unless the
264    matrix is square and the column and row owerships are identical.
265    This is a BUG. The only way to fix it seems to be to access
266    mdn->A and mdn->B directly and not through the MatZeroRows()
267    routine.
268 */
269 #undef __FUNC__
270 #define __FUNC__ "MatZeroRows_MPIDense"
271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
272 {
273   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
274   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
275   int            *procs,*nprocs,j,found,idx,nsends,*work;
276   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
277   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
278   int            *lens,imdex,*lrows,*values;
279   MPI_Comm       comm = A->comm;
280   MPI_Request    *send_waits,*recv_waits;
281   MPI_Status     recv_status,*send_status;
282   IS             istmp;
283 
284   PetscFunctionBegin;
285   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
286   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
287 
288   /*  first count number of contributors to each processor */
289   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
290   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
291   procs  = nprocs + size;
292   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
293   for ( i=0; i<N; i++ ) {
294     idx = rows[i];
295     found = 0;
296     for ( j=0; j<size; j++ ) {
297       if (idx >= owners[j] && idx < owners[j+1]) {
298         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
299       }
300     }
301     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
302   }
303   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
304 
305   /* inform other processors of number of messages and max length*/
306   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
307   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
308   nrecvs = work[rank];
309   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
310   nmax   = work[rank];
311   ierr = PetscFree(work);CHKERRQ(ierr);
312 
313   /* post receives:   */
314   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
315   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
316   for ( i=0; i<nrecvs; i++ ) {
317     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
318   }
319 
320   /* do sends:
321       1) starts[i] gives the starting index in svalues for stuff going to
322          the ith processor
323   */
324   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
325   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
326   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
327   starts[0]  = 0;
328   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
329   for ( i=0; i<N; i++ ) {
330     svalues[starts[owner[i]]++] = rows[i];
331   }
332   ISRestoreIndices(is,&rows);
333 
334   starts[0] = 0;
335   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
336   count = 0;
337   for ( i=0; i<size; i++ ) {
338     if (procs[i]) {
339       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
340     }
341   }
342   ierr = PetscFree(starts);CHKERRQ(ierr);
343 
344   base = owners[rank];
345 
346   /*  wait on receives */
347   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
348   source = lens + nrecvs;
349   count  = nrecvs; slen = 0;
350   while (count) {
351     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
352     /* unpack receives into our local space */
353     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
354     source[imdex]  = recv_status.MPI_SOURCE;
355     lens[imdex]  = n;
356     slen += n;
357     count--;
358   }
359   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
360 
361   /* move the data into the send scatter */
362   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
363   count = 0;
364   for ( i=0; i<nrecvs; i++ ) {
365     values = rvalues + i*nmax;
366     for ( j=0; j<lens[i]; j++ ) {
367       lrows[count++] = values[j] - base;
368     }
369   }
370   ierr = PetscFree(rvalues);CHKERRQ(ierr);
371   ierr = PetscFree(lens);CHKERRQ(ierr);
372   ierr = PetscFree(owner);CHKERRQ(ierr);
373   ierr = PetscFree(nprocs);CHKERRQ(ierr);
374 
375   /* actually zap the local rows */
376   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
377   PLogObjectParent(A,istmp);
378   ierr = PetscFree(lrows);CHKERRQ(ierr);
379   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
380   ierr = ISDestroy(istmp);CHKERRQ(ierr);
381 
382   /* wait on sends */
383   if (nsends) {
384     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
385     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
386     ierr = PetscFree(send_status);CHKERRQ(ierr);
387   }
388   ierr = PetscFree(send_waits);CHKERRQ(ierr);
389   ierr = PetscFree(svalues);CHKERRQ(ierr);
390 
391   PetscFunctionReturn(0);
392 }
393 
394 #undef __FUNC__
395 #define __FUNC__ "MatMult_MPIDense"
396 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
397 {
398   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
399   int          ierr;
400 
401   PetscFunctionBegin;
402   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
403   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
404   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNC__
409 #define __FUNC__ "MatMultAdd_MPIDense"
410 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
411 {
412   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
413   int          ierr;
414 
415   PetscFunctionBegin;
416   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
417   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
418   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 #undef __FUNC__
423 #define __FUNC__ "MatMultTrans_MPIDense"
424 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
425 {
426   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
427   int          ierr;
428   Scalar       zero = 0.0;
429 
430   PetscFunctionBegin;
431   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
432   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
433   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
434   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNC__
439 #define __FUNC__ "MatMultTransAdd_MPIDense"
440 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
441 {
442   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
443   int          ierr;
444 
445   PetscFunctionBegin;
446   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
447   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
448   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
449   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNC__
454 #define __FUNC__ "MatGetDiagonal_MPIDense"
455 int MatGetDiagonal_MPIDense(Mat A,Vec v)
456 {
457   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
458   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
459   int          ierr, len, i, n, m = a->m, radd;
460   Scalar       *x, zero = 0.0;
461 
462   PetscFunctionBegin;
463   VecSet(&zero,v);
464   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
465   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
466   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
467   len = PetscMin(aloc->m,aloc->n);
468   radd = a->rstart*m;
469   for ( i=0; i<len; i++ ) {
470     x[i] = aloc->v[radd + i*m + i];
471   }
472   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNC__
477 #define __FUNC__ "MatDestroy_MPIDense"
478 int MatDestroy_MPIDense(Mat mat)
479 {
480   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
481   int          ierr;
482 
483   PetscFunctionBegin;
484 
485   if (mat->mapping) {
486     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
487   }
488   if (mat->bmapping) {
489     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
490   }
491 #if defined(PETSC_USE_LOG)
492   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
493 #endif
494   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
495   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
496   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
497   if (mdn->lvec)   VecDestroy(mdn->lvec);
498   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
499   if (mdn->factor) {
500     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
501     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
502     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
503     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
504   }
505   ierr = PetscFree(mdn);CHKERRQ(ierr);
506   if (mat->rmap) {
507     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
508   }
509   if (mat->cmap) {
510     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
511   }
512   PLogObjectDestroy(mat);
513   PetscHeaderDestroy(mat);
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNC__
518 #define __FUNC__ "MatView_MPIDense_Binary"
519 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
520 {
521   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
522   int          ierr;
523 
524   PetscFunctionBegin;
525   if (mdn->size == 1) {
526     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
527   }
528   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
529   PetscFunctionReturn(0);
530 }
531 
532 #undef __FUNC__
533 #define __FUNC__ "MatView_MPIDense_ASCII"
534 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
535 {
536   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
537   int          ierr, format, size = mdn->size, rank = mdn->rank;
538   FILE         *fd;
539   ViewerType   vtype;
540 
541   PetscFunctionBegin;
542   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
543   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
544   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
545   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
546     MatInfo info;
547     ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
548     PetscSequentialPhaseBegin(mat->comm,1);
549       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
550          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
551       fflush(fd);
552     PetscSequentialPhaseEnd(mat->comm,1);
553     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
554     PetscFunctionReturn(0);
555   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
556     PetscFunctionReturn(0);
557   }
558 
559   if (size == 1) {
560     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
561   } else {
562     /* assemble the entire matrix onto first processor. */
563     Mat          A;
564     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
565     Scalar       *vals;
566     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
567 
568     if (!rank) {
569       ierr = MatCreateMPIDense(mat->comm,M,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
570     } else {
571       ierr = MatCreateMPIDense(mat->comm,0,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
572     }
573     PLogObjectParent(mat,A);
574 
575     /* Copy the matrix ... This isn't the most efficient means,
576        but it's quick for now */
577     row = mdn->rstart; m = Amdn->m;
578     for ( i=0; i<m; i++ ) {
579       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
580       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
581       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
582       row++;
583     }
584 
585     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
586     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
587     if (!rank) {
588       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr);
589     }
590     ierr = MatDestroy(A);CHKERRQ(ierr);
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNC__
596 #define __FUNC__ "MatView_MPIDense"
597 int MatView_MPIDense(Mat mat,Viewer viewer)
598 {
599   int          ierr;
600   ViewerType   vtype;
601 
602   PetscFunctionBegin;
603   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
604   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
605     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
606   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
607     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
608   } else {
609     SETERRQ(1,1,"Viewer type not supported by PETSc object");
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 #undef __FUNC__
615 #define __FUNC__ "MatGetInfo_MPIDense"
616 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
617 {
618   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
619   Mat          mdn = mat->A;
620   int          ierr;
621   double       isend[5], irecv[5];
622 
623   PetscFunctionBegin;
624   info->rows_global    = (double)mat->M;
625   info->columns_global = (double)mat->N;
626   info->rows_local     = (double)mat->m;
627   info->columns_local  = (double)mat->N;
628   info->block_size     = 1.0;
629   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
630   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
631   isend[3] = info->memory;  isend[4] = info->mallocs;
632   if (flag == MAT_LOCAL) {
633     info->nz_used      = isend[0];
634     info->nz_allocated = isend[1];
635     info->nz_unneeded  = isend[2];
636     info->memory       = isend[3];
637     info->mallocs      = isend[4];
638   } else if (flag == MAT_GLOBAL_MAX) {
639     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
640     info->nz_used      = irecv[0];
641     info->nz_allocated = irecv[1];
642     info->nz_unneeded  = irecv[2];
643     info->memory       = irecv[3];
644     info->mallocs      = irecv[4];
645   } else if (flag == MAT_GLOBAL_SUM) {
646     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
647     info->nz_used      = irecv[0];
648     info->nz_allocated = irecv[1];
649     info->nz_unneeded  = irecv[2];
650     info->memory       = irecv[3];
651     info->mallocs      = irecv[4];
652   }
653   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
654   info->fill_ratio_needed = 0;
655   info->factor_mallocs    = 0;
656   PetscFunctionReturn(0);
657 }
658 
659 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
660    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
661    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
662    extern int MatSolve_MPIDense(Mat,Vec,Vec);
663    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
664    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
665    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
666 
667 #undef __FUNC__
668 #define __FUNC__ "MatSetOption_MPIDense"
669 int MatSetOption_MPIDense(Mat A,MatOption op)
670 {
671   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
672 
673   PetscFunctionBegin;
674   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
675       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
676       op == MAT_NEW_NONZERO_LOCATION_ERR ||
677       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
678       op == MAT_COLUMNS_SORTED ||
679       op == MAT_COLUMNS_UNSORTED) {
680         MatSetOption(a->A,op);
681   } else if (op == MAT_ROW_ORIENTED) {
682         a->roworiented = 1;
683         MatSetOption(a->A,op);
684   } else if (op == MAT_ROWS_SORTED ||
685              op == MAT_ROWS_UNSORTED ||
686              op == MAT_SYMMETRIC ||
687              op == MAT_STRUCTURALLY_SYMMETRIC ||
688              op == MAT_YES_NEW_DIAGONALS ||
689              op == MAT_USE_HASH_TABLE) {
690     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
691   } else if (op == MAT_COLUMN_ORIENTED) {
692     a->roworiented = 0; MatSetOption(a->A,op);
693   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
694     a->donotstash = 1;
695   } else if (op == MAT_NO_NEW_DIAGONALS) {
696     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
697   } else {
698     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
699   }
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNC__
704 #define __FUNC__ "MatGetSize_MPIDense"
705 int MatGetSize_MPIDense(Mat A,int *m,int *n)
706 {
707   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
708 
709   PetscFunctionBegin;
710   *m = mat->M; *n = mat->N;
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNC__
715 #define __FUNC__ "MatGetLocalSize_MPIDense"
716 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
717 {
718   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
719 
720   PetscFunctionBegin;
721   *m = mat->m; *n = mat->N;
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNC__
726 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
727 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
728 {
729   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
730 
731   PetscFunctionBegin;
732   *m = mat->rstart; *n = mat->rend;
733   PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNC__
737 #define __FUNC__ "MatGetRow_MPIDense"
738 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
739 {
740   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
741   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
742 
743   PetscFunctionBegin;
744   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
745   lrow = row - rstart;
746   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNC__
751 #define __FUNC__ "MatRestoreRow_MPIDense"
752 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
753 {
754   int ierr;
755 
756   PetscFunctionBegin;
757   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
758   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNC__
763 #define __FUNC__ "MatDiagonalScale_MPIDense"
764 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
765 {
766   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
767   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
768   Scalar       *l,*r,x,*v;
769   int          ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n;
770 
771   PetscFunctionBegin;
772   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
773   if (ll) {
774     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
775     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size");
776     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
777     for ( i=0; i<m; i++ ) {
778       x = l[i];
779       v = mat->v + i;
780       for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;}
781     }
782     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
783     PLogFlops(n*m);
784   }
785   if (rr) {
786     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
787     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size");
788     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
789     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
790     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
791     for ( i=0; i<n; i++ ) {
792       x = r[i];
793       v = mat->v + i*m;
794       for ( j=0; j<m; j++ ) { (*v++) *= x;}
795     }
796     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
797     PLogFlops(n*m);
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 #undef __FUNC__
803 #define __FUNC__ "MatNorm_MPIDense"
804 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
805 {
806   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
807   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
808   int          ierr, i, j;
809   double       sum = 0.0;
810   Scalar       *v = mat->v;
811 
812   PetscFunctionBegin;
813   if (mdn->size == 1) {
814     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
815   } else {
816     if (type == NORM_FROBENIUS) {
817       for (i=0; i<mat->n*mat->m; i++ ) {
818 #if defined(PETSC_USE_COMPLEX)
819         sum += PetscReal(PetscConj(*v)*(*v)); v++;
820 #else
821         sum += (*v)*(*v); v++;
822 #endif
823       }
824       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
825       *norm = sqrt(*norm);
826       PLogFlops(2*mat->n*mat->m);
827     } else if (type == NORM_1) {
828       double *tmp, *tmp2;
829       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
830       tmp2 = tmp + mdn->N;
831       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
832       *norm = 0.0;
833       v = mat->v;
834       for ( j=0; j<mat->n; j++ ) {
835         for ( i=0; i<mat->m; i++ ) {
836           tmp[j] += PetscAbsScalar(*v);  v++;
837         }
838       }
839       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
840       for ( j=0; j<mdn->N; j++ ) {
841         if (tmp2[j] > *norm) *norm = tmp2[j];
842       }
843       ierr = PetscFree(tmp);CHKERRQ(ierr);
844       PLogFlops(mat->n*mat->m);
845     } else if (type == NORM_INFINITY) { /* max row norm */
846       double ntemp;
847       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
848       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
849     } else {
850       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
851     }
852   }
853   PetscFunctionReturn(0);
854 }
855 
856 #undef __FUNC__
857 #define __FUNC__ "MatTranspose_MPIDense"
858 int MatTranspose_MPIDense(Mat A,Mat *matout)
859 {
860   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
861   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
862   Mat          B;
863   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
864   int          j, i, ierr;
865   Scalar       *v;
866 
867   PetscFunctionBegin;
868   if (matout == PETSC_NULL && M != N) {
869     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
870   }
871   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
872 
873   m = Aloc->m; n = Aloc->n; v = Aloc->v;
874   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
875   for ( j=0; j<n; j++ ) {
876     for (i=0; i<m; i++) rwork[i] = rstart + i;
877     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
878     v   += m;
879   }
880   ierr = PetscFree(rwork);CHKERRQ(ierr);
881   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
882   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883   if (matout != PETSC_NULL) {
884     *matout = B;
885   } else {
886     PetscOps *Abops;
887     MatOps   Aops;
888 
889     /* This isn't really an in-place transpose, but free data struct from a */
890     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
891     ierr = MatDestroy(a->A);CHKERRQ(ierr);
892     if (a->lvec) VecDestroy(a->lvec);
893     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
894     ierr = PetscFree(a);CHKERRQ(ierr);
895 
896     /*
897          This is horrible, horrible code. We need to keep the
898       A pointers for the bops and ops but copy everything
899       else from C.
900     */
901     Abops   = A->bops;
902     Aops    = A->ops;
903     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
904     A->bops = Abops;
905     A->ops  = Aops;
906 
907     PetscHeaderDestroy(B);
908   }
909   PetscFunctionReturn(0);
910 }
911 
912 #include "pinclude/blaslapack.h"
913 #undef __FUNC__
914 #define __FUNC__ "MatScale_MPIDense"
915 int MatScale_MPIDense(Scalar *alpha,Mat inA)
916 {
917   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
918   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
919   int          one = 1, nz;
920 
921   PetscFunctionBegin;
922   nz = a->m*a->n;
923   BLscal_( &nz, alpha, a->v, &one );
924   PLogFlops(nz);
925   PetscFunctionReturn(0);
926 }
927 
928 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
929 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
930 
931 /* -------------------------------------------------------------------*/
932 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
933        MatGetRow_MPIDense,
934        MatRestoreRow_MPIDense,
935        MatMult_MPIDense,
936        MatMultAdd_MPIDense,
937        MatMultTrans_MPIDense,
938        MatMultTransAdd_MPIDense,
939        0,
940        0,
941        0,
942        0,
943        0,
944        0,
945        0,
946        MatTranspose_MPIDense,
947        MatGetInfo_MPIDense,0,
948        MatGetDiagonal_MPIDense,
949        MatDiagonalScale_MPIDense,
950        MatNorm_MPIDense,
951        MatAssemblyBegin_MPIDense,
952        MatAssemblyEnd_MPIDense,
953        0,
954        MatSetOption_MPIDense,
955        MatZeroEntries_MPIDense,
956        MatZeroRows_MPIDense,
957        0,
958        0,
959        0,
960        0,
961        MatGetSize_MPIDense,
962        MatGetLocalSize_MPIDense,
963        MatGetOwnershipRange_MPIDense,
964        0,
965        0,
966        MatGetArray_MPIDense,
967        MatRestoreArray_MPIDense,
968        MatDuplicate_MPIDense,
969        0,
970        0,
971        0,
972        0,
973        0,
974        MatGetSubMatrices_MPIDense,
975        0,
976        MatGetValues_MPIDense,
977        0,
978        0,
979        MatScale_MPIDense,
980        0,
981        0,
982        0,
983        MatGetBlockSize_MPIDense,
984        0,
985        0,
986        0,
987        0,
988        0,
989        0,
990        0,
991        0,
992        0,
993        MatGetSubMatrix_MPIDense,
994        0,
995        0,
996        MatGetMaps_Petsc};
997 
998 #undef __FUNC__
999 #define __FUNC__ "MatCreateMPIDense"
1000 /*@C
1001    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1002 
1003    Collective on MPI_Comm
1004 
1005    Input Parameters:
1006 +  comm - MPI communicator
1007 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1008 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1009 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1010 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1011 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1012    to control all matrix memory allocation.
1013 
1014    Output Parameter:
1015 .  A - the matrix
1016 
1017    Notes:
1018    The dense format is fully compatible with standard Fortran 77
1019    storage by columns.
1020 
1021    The data input variable is intended primarily for Fortran programmers
1022    who wish to allocate their own matrix memory space.  Most users should
1023    set data=PETSC_NULL.
1024 
1025    The user MUST specify either the local or global matrix dimensions
1026    (possibly both).
1027 
1028    Level: intermediate
1029 
1030 .keywords: matrix, dense, parallel
1031 
1032 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1033 @*/
1034 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1035 {
1036   Mat          mat;
1037   Mat_MPIDense *a;
1038   int          ierr, i,flg;
1039 
1040   PetscFunctionBegin;
1041   /* Note:  For now, when data is specified above, this assumes the user correctly
1042    allocates the local dense storage space.  We should add error checking. */
1043 
1044   *A = 0;
1045   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
1046   PLogObjectCreate(mat);
1047   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1048   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1049   mat->ops->destroy = MatDestroy_MPIDense;
1050   mat->ops->view    = MatView_MPIDense;
1051   mat->factor       = 0;
1052   mat->mapping      = 0;
1053 
1054   a->factor       = 0;
1055   mat->insertmode = NOT_SET_VALUES;
1056   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
1057   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
1058 
1059   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1060 
1061   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1062   a->nvec = n;
1063 
1064   /* each row stores all columns */
1065   a->N = mat->N = N;
1066   a->M = mat->M = M;
1067   a->m = mat->m = m;
1068   a->n = mat->n = N;   /* NOTE: n == N */
1069 
1070   /* the information in the maps duplicates the information computed below, eventually
1071      we should remove the duplicate information that is not contained in the maps */
1072   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1073   ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr);
1074 
1075   /* build local table of row and column ownerships */
1076   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1077   a->cowners = a->rowners + a->size + 1;
1078   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1079   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1080   a->rowners[0] = 0;
1081   for ( i=2; i<=a->size; i++ ) {
1082     a->rowners[i] += a->rowners[i-1];
1083   }
1084   a->rstart = a->rowners[a->rank];
1085   a->rend   = a->rowners[a->rank+1];
1086   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1087   a->cowners[0] = 0;
1088   for ( i=2; i<=a->size; i++ ) {
1089     a->cowners[i] += a->cowners[i-1];
1090   }
1091 
1092   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1093   PLogObjectParent(mat,a->A);
1094 
1095   /* build cache for off array entries formed */
1096   a->donotstash = 0;
1097   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1098 
1099   /* stuff used for matrix vector multiply */
1100   a->lvec        = 0;
1101   a->Mvctx       = 0;
1102   a->roworiented = 1;
1103 
1104   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",
1105                                      "MatGetDiagonalBlock_MPIDense",
1106                                      (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1107 
1108   *A = mat;
1109   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1110   if (flg) {
1111     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1112   }
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNC__
1117 #define __FUNC__ "MatDuplicate_MPIDense"
1118 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1119 {
1120   Mat          mat;
1121   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1122   int          ierr;
1123   FactorCtx    *factor;
1124 
1125   PetscFunctionBegin;
1126   *newmat       = 0;
1127   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1128   PLogObjectCreate(mat);
1129   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1130   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1131   mat->ops->destroy = MatDestroy_MPIDense;
1132   mat->ops->view    = MatView_MPIDense;
1133   mat->factor       = A->factor;
1134   mat->assembled    = PETSC_TRUE;
1135 
1136   a->m = mat->m = oldmat->m;
1137   a->n = mat->n = oldmat->n;
1138   a->M = mat->M = oldmat->M;
1139   a->N = mat->N = oldmat->N;
1140   if (oldmat->factor) {
1141     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1142     /* copy factor contents ... add this code! */
1143   } else a->factor = 0;
1144 
1145   a->rstart       = oldmat->rstart;
1146   a->rend         = oldmat->rend;
1147   a->size         = oldmat->size;
1148   a->rank         = oldmat->rank;
1149   mat->insertmode = NOT_SET_VALUES;
1150   a->donotstash   = oldmat->donotstash;
1151   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1152   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1153   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1154   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1155 
1156   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1157   PLogObjectParent(mat,a->lvec);
1158   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1159   PLogObjectParent(mat,a->Mvctx);
1160   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1161   PLogObjectParent(mat,a->A);
1162   *newmat = mat;
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #include "sys.h"
1167 
1168 #undef __FUNC__
1169 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1170 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1171 {
1172   int        *rowners, i,size,rank,m,ierr,nz,j;
1173   Scalar     *array,*vals,*vals_ptr;
1174   MPI_Status status;
1175 
1176   PetscFunctionBegin;
1177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1179 
1180   /* determine ownership of all rows */
1181   m          = M/size + ((M % size) > rank);
1182   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1183   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1184   rowners[0] = 0;
1185   for ( i=2; i<=size; i++ ) {
1186     rowners[i] += rowners[i-1];
1187   }
1188 
1189   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1190   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1191 
1192   if (!rank) {
1193     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1194 
1195     /* read in my part of the matrix numerical values  */
1196     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1197 
1198     /* insert into matrix-by row (this is why cannot directly read into array */
1199     vals_ptr = vals;
1200     for ( i=0; i<m; i++ ) {
1201       for ( j=0; j<N; j++ ) {
1202         array[i + j*m] = *vals_ptr++;
1203       }
1204     }
1205 
1206     /* read in other processors and ship out */
1207     for ( i=1; i<size; i++ ) {
1208       nz   = (rowners[i+1] - rowners[i])*N;
1209       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1210       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1211     }
1212   } else {
1213     /* receive numeric values */
1214     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1215 
1216     /* receive message of values*/
1217     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1218 
1219     /* insert into matrix-by row (this is why cannot directly read into array */
1220     vals_ptr = vals;
1221     for ( i=0; i<m; i++ ) {
1222       for ( j=0; j<N; j++ ) {
1223         array[i + j*m] = *vals_ptr++;
1224       }
1225     }
1226   }
1227   ierr = PetscFree(rowners);CHKERRQ(ierr);
1228   ierr = PetscFree(vals);CHKERRQ(ierr);
1229   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1230   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 
1235 #undef __FUNC__
1236 #define __FUNC__ "MatLoad_MPIDense"
1237 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1238 {
1239   Mat          A;
1240   Scalar       *vals,*svals;
1241   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1242   MPI_Status   status;
1243   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1244   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1245   int          tag = ((PetscObject)viewer)->tag;
1246   int          i, nz, ierr, j,rstart, rend, fd;
1247 
1248   PetscFunctionBegin;
1249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1250   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1251   if (!rank) {
1252     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1253     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1254     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1255   }
1256 
1257   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1258   M = header[1]; N = header[2]; nz = header[3];
1259 
1260   /*
1261        Handle case where matrix is stored on disk as a dense matrix
1262   */
1263   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1264     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1265     PetscFunctionReturn(0);
1266   }
1267 
1268   /* determine ownership of all rows */
1269   m          = M/size + ((M % size) > rank);
1270   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1271   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1272   rowners[0] = 0;
1273   for ( i=2; i<=size; i++ ) {
1274     rowners[i] += rowners[i-1];
1275   }
1276   rstart = rowners[rank];
1277   rend   = rowners[rank+1];
1278 
1279   /* distribute row lengths to all processors */
1280   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1281   offlens = ourlens + (rend-rstart);
1282   if (!rank) {
1283     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1284     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1285     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1286     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1287     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1288     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1289   } else {
1290     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1291   }
1292 
1293   if (!rank) {
1294     /* calculate the number of nonzeros on each processor */
1295     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1296     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1297     for ( i=0; i<size; i++ ) {
1298       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1299         procsnz[i] += rowlengths[j];
1300       }
1301     }
1302     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1303 
1304     /* determine max buffer needed and allocate it */
1305     maxnz = 0;
1306     for ( i=0; i<size; i++ ) {
1307       maxnz = PetscMax(maxnz,procsnz[i]);
1308     }
1309     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1310 
1311     /* read in my part of the matrix column indices  */
1312     nz = procsnz[0];
1313     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1314     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1315 
1316     /* read in every one elses and ship off */
1317     for ( i=1; i<size; i++ ) {
1318       nz   = procsnz[i];
1319       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1320       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1321     }
1322     ierr = PetscFree(cols);CHKERRQ(ierr);
1323   } else {
1324     /* determine buffer space needed for message */
1325     nz = 0;
1326     for ( i=0; i<m; i++ ) {
1327       nz += ourlens[i];
1328     }
1329     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1330 
1331     /* receive message of column indices*/
1332     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1333     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1334     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1335   }
1336 
1337   /* loop over local rows, determining number of off diagonal entries */
1338   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1339   jj = 0;
1340   for ( i=0; i<m; i++ ) {
1341     for ( j=0; j<ourlens[i]; j++ ) {
1342       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1343       jj++;
1344     }
1345   }
1346 
1347   /* create our matrix */
1348   for ( i=0; i<m; i++ ) {
1349     ourlens[i] -= offlens[i];
1350   }
1351   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1352   A = *newmat;
1353   for ( i=0; i<m; i++ ) {
1354     ourlens[i] += offlens[i];
1355   }
1356 
1357   if (!rank) {
1358     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1359 
1360     /* read in my part of the matrix numerical values  */
1361     nz = procsnz[0];
1362     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1363 
1364     /* insert into matrix */
1365     jj      = rstart;
1366     smycols = mycols;
1367     svals   = vals;
1368     for ( i=0; i<m; i++ ) {
1369       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1370       smycols += ourlens[i];
1371       svals   += ourlens[i];
1372       jj++;
1373     }
1374 
1375     /* read in other processors and ship out */
1376     for ( i=1; i<size; i++ ) {
1377       nz   = procsnz[i];
1378       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1379       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1380     }
1381     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1382   } else {
1383     /* receive numeric values */
1384     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1385 
1386     /* receive message of values*/
1387     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1388     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1389     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1390 
1391     /* insert into matrix */
1392     jj      = rstart;
1393     smycols = mycols;
1394     svals   = vals;
1395     for ( i=0; i<m; i++ ) {
1396       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1397       smycols += ourlens[i];
1398       svals   += ourlens[i];
1399       jj++;
1400     }
1401   }
1402   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1403   ierr = PetscFree(vals);CHKERRQ(ierr);
1404   ierr = PetscFree(mycols);CHKERRQ(ierr);
1405   ierr = PetscFree(rowners);CHKERRQ(ierr);
1406 
1407   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1408   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 
1413 
1414 
1415 
1416