xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision ac2a4f0d24b3b6a4ee93edbcad41f4bb9e923944) !
1 /*$Id: mpidense.c,v 1.128 1999/10/13 20:37:17 bsmith Exp bsmith $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNC__
12 #define __FUNC__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
16   int          m = mdn->m,rstart = mdn->rstart,rank,ierr;
17   Scalar       *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
27   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
28   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
29   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
30   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
33 
34   *iscopy = PETSC_TRUE;
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 extern int MatSetUpMultiply_MPIDense(Mat);
40 
41 #undef __FUNC__
42 #define __FUNC__ "MatSetValues_MPIDense"
43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
44 {
45   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
46   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
47   int          roworiented = A->roworiented;
48 
49   PetscFunctionBegin;
50   for ( i=0; i<m; i++ ) {
51     if (idxm[i] < 0) continue;
52     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
53     if (idxm[i] >= rstart && idxm[i] < rend) {
54       row = idxm[i] - rstart;
55       if (roworiented) {
56         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
57       } else {
58         for ( j=0; j<n; j++ ) {
59           if (idxn[j] < 0) continue;
60           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
61           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
62         }
63       }
64     } else {
65       if ( !A->donotstash) {
66         if (roworiented) {
67           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
68         } else {
69           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
70         }
71       }
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetValues_MPIDense"
79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
80 {
81   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
82   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
83 
84   PetscFunctionBegin;
85   for ( i=0; i<m; i++ ) {
86     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
87     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
88     if (idxm[i] >= rstart && idxm[i] < rend) {
89       row = idxm[i] - rstart;
90       for ( j=0; j<n; j++ ) {
91         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
92         if (idxn[j] >= mdn->N) {
93           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
94         }
95         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
96       }
97     } else {
98       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
99     }
100   }
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNC__
105 #define __FUNC__ "MatGetArray_MPIDense"
106 int MatGetArray_MPIDense(Mat A,Scalar **array)
107 {
108   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
109   int          ierr;
110 
111   PetscFunctionBegin;
112   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNC__
117 #define __FUNC__ "MatGetSubMatrix_MPIDense"
118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
119 {
120   Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd;
121   Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data;
122   int          i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank;
123   Scalar       *av, *bv, *v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
128   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
129   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
130   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
131   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
132 
133   /* No parallel redistribution currently supported! Should really check each index set
134      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
135      original matrix! */
136 
137   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
138   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
139 
140   /* Check submatrix call */
141   if (scall == MAT_REUSE_MATRIX) {
142     /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */
143     /* Really need to test rows and column sizes! */
144     newmat = *B;
145   } else {
146     /* Create and fill new matrix */
147     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
148   }
149 
150   /* Now extract the data pointers and do the copy, column at a time */
151   newmatd = (Mat_MPIDense *) newmat->data;
152   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
153 
154   for ( i=0; i<ncols; i++ ) {
155     av = v + nlrows*icol[i];
156     for (j=0; j<nrows; j++ ) {
157       *bv++ = av[irow[j] - rstart];
158     }
159   }
160 
161   /* Assemble the matrices so that the correct flags are set */
162   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
163   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164 
165   /* Free work space */
166   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
167   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
168   *B = newmat;
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNC__
173 #define __FUNC__ "MatRestoreArray_MPIDense"
174 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
175 {
176   PetscFunctionBegin;
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNC__
181 #define __FUNC__ "MatAssemblyBegin_MPIDense"
182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
183 {
184   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
185   MPI_Comm     comm = mat->comm;
186   int          ierr,nstash,reallocs;
187   InsertMode   addv;
188 
189   PetscFunctionBegin;
190   /* make sure all processors are either in INSERTMODE or ADDMODE */
191   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
192   if (addv == (ADD_VALUES|INSERT_VALUES)) {
193     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
194   }
195   mat->insertmode = addv; /* in case this processor had no cache */
196 
197   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
198   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
199   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
200            nstash,reallocs);
201   PetscFunctionReturn(0);
202 }
203 
204 #undef __FUNC__
205 #define __FUNC__ "MatAssemblyEnd_MPIDense"
206 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
207 {
208   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
209   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
210   Scalar       *val;
211   InsertMode   addv=mat->insertmode;
212 
213   PetscFunctionBegin;
214   /*  wait on receives */
215   while (1) {
216     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
217     if (!flg) break;
218 
219     for ( i=0; i<n; ) {
220       /* Now identify the consecutive vals belonging to the same row */
221       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
222       if (j < n) ncols = j-i;
223       else       ncols = n-i;
224       /* Now assemble all these values with a single function call */
225       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
226       i = j;
227     }
228   }
229   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
230 
231   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
232   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
233 
234   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
235     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
236   }
237   PetscFunctionReturn(0);
238 }
239 
240 #undef __FUNC__
241 #define __FUNC__ "MatZeroEntries_MPIDense"
242 int MatZeroEntries_MPIDense(Mat A)
243 {
244   int          ierr;
245   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
246 
247   PetscFunctionBegin;
248   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
249   PetscFunctionReturn(0);
250 }
251 
252 #undef __FUNC__
253 #define __FUNC__ "MatGetBlockSize_MPIDense"
254 int MatGetBlockSize_MPIDense(Mat A,int *bs)
255 {
256   PetscFunctionBegin;
257   *bs = 1;
258   PetscFunctionReturn(0);
259 }
260 
261 /* the code does not do the diagonal entries correctly unless the
262    matrix is square and the column and row owerships are identical.
263    This is a BUG. The only way to fix it seems to be to access
264    mdn->A and mdn->B directly and not through the MatZeroRows()
265    routine.
266 */
267 #undef __FUNC__
268 #define __FUNC__ "MatZeroRows_MPIDense"
269 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
270 {
271   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
272   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
273   int            *procs,*nprocs,j,found,idx,nsends,*work;
274   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
275   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
276   int            *lens,imdex,*lrows,*values;
277   MPI_Comm       comm = A->comm;
278   MPI_Request    *send_waits,*recv_waits;
279   MPI_Status     recv_status,*send_status;
280   IS             istmp;
281 
282   PetscFunctionBegin;
283   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
284   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
285 
286   /*  first count number of contributors to each processor */
287   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
288   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
289   procs  = nprocs + size;
290   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
291   for ( i=0; i<N; i++ ) {
292     idx = rows[i];
293     found = 0;
294     for ( j=0; j<size; j++ ) {
295       if (idx >= owners[j] && idx < owners[j+1]) {
296         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
297       }
298     }
299     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
300   }
301   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
302 
303   /* inform other processors of number of messages and max length*/
304   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
305   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
306   nmax   = work[rank];
307   nrecvs = work[size+rank];
308   ierr   = PetscFree(work);CHKERRQ(ierr);
309 
310   /* post receives:   */
311   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
312   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
313   for ( i=0; i<nrecvs; i++ ) {
314     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
315   }
316 
317   /* do sends:
318       1) starts[i] gives the starting index in svalues for stuff going to
319          the ith processor
320   */
321   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
322   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
323   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
324   starts[0]  = 0;
325   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
326   for ( i=0; i<N; i++ ) {
327     svalues[starts[owner[i]]++] = rows[i];
328   }
329   ISRestoreIndices(is,&rows);
330 
331   starts[0] = 0;
332   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
333   count = 0;
334   for ( i=0; i<size; i++ ) {
335     if (procs[i]) {
336       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
337     }
338   }
339   ierr = PetscFree(starts);CHKERRQ(ierr);
340 
341   base = owners[rank];
342 
343   /*  wait on receives */
344   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
345   source = lens + nrecvs;
346   count  = nrecvs; slen = 0;
347   while (count) {
348     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
349     /* unpack receives into our local space */
350     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
351     source[imdex]  = recv_status.MPI_SOURCE;
352     lens[imdex]  = n;
353     slen += n;
354     count--;
355   }
356   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
357 
358   /* move the data into the send scatter */
359   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
360   count = 0;
361   for ( i=0; i<nrecvs; i++ ) {
362     values = rvalues + i*nmax;
363     for ( j=0; j<lens[i]; j++ ) {
364       lrows[count++] = values[j] - base;
365     }
366   }
367   ierr = PetscFree(rvalues);CHKERRQ(ierr);
368   ierr = PetscFree(lens);CHKERRQ(ierr);
369   ierr = PetscFree(owner);CHKERRQ(ierr);
370   ierr = PetscFree(nprocs);CHKERRQ(ierr);
371 
372   /* actually zap the local rows */
373   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
374   PLogObjectParent(A,istmp);
375   ierr = PetscFree(lrows);CHKERRQ(ierr);
376   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
377   ierr = ISDestroy(istmp);CHKERRQ(ierr);
378 
379   /* wait on sends */
380   if (nsends) {
381     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
382     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
383     ierr = PetscFree(send_status);CHKERRQ(ierr);
384   }
385   ierr = PetscFree(send_waits);CHKERRQ(ierr);
386   ierr = PetscFree(svalues);CHKERRQ(ierr);
387 
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNC__
392 #define __FUNC__ "MatMult_MPIDense"
393 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
394 {
395   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
396   int          ierr;
397 
398   PetscFunctionBegin;
399   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
401   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNC__
406 #define __FUNC__ "MatMultAdd_MPIDense"
407 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
408 {
409   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
410   int          ierr;
411 
412   PetscFunctionBegin;
413   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
415   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNC__
420 #define __FUNC__ "MatMultTrans_MPIDense"
421 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
422 {
423   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
424   int          ierr;
425   Scalar       zero = 0.0;
426 
427   PetscFunctionBegin;
428   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
429   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
430   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
431   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 #undef __FUNC__
436 #define __FUNC__ "MatMultTransAdd_MPIDense"
437 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
438 {
439   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
440   int          ierr;
441 
442   PetscFunctionBegin;
443   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
444   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
445   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNC__
451 #define __FUNC__ "MatGetDiagonal_MPIDense"
452 int MatGetDiagonal_MPIDense(Mat A,Vec v)
453 {
454   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
455   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
456   int          ierr, len, i, n, m = a->m, radd;
457   Scalar       *x, zero = 0.0;
458 
459   PetscFunctionBegin;
460   VecSet(&zero,v);
461   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
462   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
463   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
464   len = PetscMin(aloc->m,aloc->n);
465   radd = a->rstart*m;
466   for ( i=0; i<len; i++ ) {
467     x[i] = aloc->v[radd + i*m + i];
468   }
469   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
470   PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNC__
474 #define __FUNC__ "MatDestroy_MPIDense"
475 int MatDestroy_MPIDense(Mat mat)
476 {
477   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
478   int          ierr;
479 
480   PetscFunctionBegin;
481 
482   if (mat->mapping) {
483     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
484   }
485   if (mat->bmapping) {
486     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
487   }
488 #if defined(PETSC_USE_LOG)
489   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
490 #endif
491   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
492   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
493   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
494   if (mdn->lvec)   VecDestroy(mdn->lvec);
495   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
496   if (mdn->factor) {
497     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
498     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
499     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
500     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
501   }
502   ierr = PetscFree(mdn);CHKERRQ(ierr);
503   if (mat->rmap) {
504     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
505   }
506   if (mat->cmap) {
507     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
508   }
509   PLogObjectDestroy(mat);
510   PetscHeaderDestroy(mat);
511   PetscFunctionReturn(0);
512 }
513 
514 #undef __FUNC__
515 #define __FUNC__ "MatView_MPIDense_Binary"
516 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
517 {
518   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
519   int          ierr;
520 
521   PetscFunctionBegin;
522   if (mdn->size == 1) {
523     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
524   }
525   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
526   PetscFunctionReturn(0);
527 }
528 
529 #undef __FUNC__
530 #define __FUNC__ "MatView_MPIDense_ASCII"
531 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
532 {
533   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
534   int          ierr, format, size = mdn->size, rank = mdn->rank;
535   FILE         *fd;
536   ViewerType   vtype;
537 
538   PetscFunctionBegin;
539   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
540   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
541   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
542   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
543     MatInfo info;
544     ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
545     ierr = ViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
546          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
547     ierr = ViewerFlush(viewer);CHKERRQ(ierr);
548     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
549     PetscFunctionReturn(0);
550   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
551     PetscFunctionReturn(0);
552   }
553 
554   if (size == 1) {
555     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
556   } else {
557     /* assemble the entire matrix onto first processor. */
558     Mat          A;
559     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
560     Scalar       *vals;
561     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
562 
563     if (!rank) {
564       ierr = MatCreateMPIDense(mat->comm,M,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
565     } else {
566       ierr = MatCreateMPIDense(mat->comm,0,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
567     }
568     PLogObjectParent(mat,A);
569 
570     /* Copy the matrix ... This isn't the most efficient means,
571        but it's quick for now */
572     row = mdn->rstart; m = Amdn->m;
573     for ( i=0; i<m; i++ ) {
574       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
575       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
576       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
577       row++;
578     }
579 
580     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
581     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
582     if (!rank) {
583       Viewer sviewer;
584       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
585       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
586       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
587     }
588     ierr = ViewerFlush(viewer);CHKERRQ(ierr);
589     ierr = MatDestroy(A);CHKERRQ(ierr);
590   }
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNC__
595 #define __FUNC__ "MatView_MPIDense"
596 int MatView_MPIDense(Mat mat,Viewer viewer)
597 {
598   int        ierr;
599   PetscTruth isascii,isbinary;
600 
601   PetscFunctionBegin;
602 
603   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
604   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
605 
606   if (isascii) {
607     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
608   } else if (isbinary) {
609     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
610   } else {
611     SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
612   }
613   PetscFunctionReturn(0);
614 }
615 
616 #undef __FUNC__
617 #define __FUNC__ "MatGetInfo_MPIDense"
618 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
619 {
620   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
621   Mat          mdn = mat->A;
622   int          ierr;
623   double       isend[5], irecv[5];
624 
625   PetscFunctionBegin;
626   info->rows_global    = (double)mat->M;
627   info->columns_global = (double)mat->N;
628   info->rows_local     = (double)mat->m;
629   info->columns_local  = (double)mat->N;
630   info->block_size     = 1.0;
631   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
632   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
633   isend[3] = info->memory;  isend[4] = info->mallocs;
634   if (flag == MAT_LOCAL) {
635     info->nz_used      = isend[0];
636     info->nz_allocated = isend[1];
637     info->nz_unneeded  = isend[2];
638     info->memory       = isend[3];
639     info->mallocs      = isend[4];
640   } else if (flag == MAT_GLOBAL_MAX) {
641     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
642     info->nz_used      = irecv[0];
643     info->nz_allocated = irecv[1];
644     info->nz_unneeded  = irecv[2];
645     info->memory       = irecv[3];
646     info->mallocs      = irecv[4];
647   } else if (flag == MAT_GLOBAL_SUM) {
648     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
649     info->nz_used      = irecv[0];
650     info->nz_allocated = irecv[1];
651     info->nz_unneeded  = irecv[2];
652     info->memory       = irecv[3];
653     info->mallocs      = irecv[4];
654   }
655   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
656   info->fill_ratio_needed = 0;
657   info->factor_mallocs    = 0;
658   PetscFunctionReturn(0);
659 }
660 
661 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
662    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
663    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
664    extern int MatSolve_MPIDense(Mat,Vec,Vec);
665    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
666    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
667    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
668 
669 #undef __FUNC__
670 #define __FUNC__ "MatSetOption_MPIDense"
671 int MatSetOption_MPIDense(Mat A,MatOption op)
672 {
673   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
674 
675   PetscFunctionBegin;
676   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
677       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
678       op == MAT_NEW_NONZERO_LOCATION_ERR ||
679       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
680       op == MAT_COLUMNS_SORTED ||
681       op == MAT_COLUMNS_UNSORTED) {
682         MatSetOption(a->A,op);
683   } else if (op == MAT_ROW_ORIENTED) {
684         a->roworiented = 1;
685         MatSetOption(a->A,op);
686   } else if (op == MAT_ROWS_SORTED ||
687              op == MAT_ROWS_UNSORTED ||
688              op == MAT_SYMMETRIC ||
689              op == MAT_STRUCTURALLY_SYMMETRIC ||
690              op == MAT_YES_NEW_DIAGONALS ||
691              op == MAT_USE_HASH_TABLE) {
692     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
693   } else if (op == MAT_COLUMN_ORIENTED) {
694     a->roworiented = 0; MatSetOption(a->A,op);
695   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
696     a->donotstash = 1;
697   } else if (op == MAT_NO_NEW_DIAGONALS) {
698     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
699   } else {
700     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
701   }
702   PetscFunctionReturn(0);
703 }
704 
705 #undef __FUNC__
706 #define __FUNC__ "MatGetSize_MPIDense"
707 int MatGetSize_MPIDense(Mat A,int *m,int *n)
708 {
709   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
710 
711   PetscFunctionBegin;
712   *m = mat->M; *n = mat->N;
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNC__
717 #define __FUNC__ "MatGetLocalSize_MPIDense"
718 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
719 {
720   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
721 
722   PetscFunctionBegin;
723   *m = mat->m; *n = mat->N;
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNC__
728 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
729 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
730 {
731   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
732 
733   PetscFunctionBegin;
734   *m = mat->rstart; *n = mat->rend;
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNC__
739 #define __FUNC__ "MatGetRow_MPIDense"
740 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
741 {
742   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
743   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
744 
745   PetscFunctionBegin;
746   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
747   lrow = row - rstart;
748   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
749   PetscFunctionReturn(0);
750 }
751 
752 #undef __FUNC__
753 #define __FUNC__ "MatRestoreRow_MPIDense"
754 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
755 {
756   int ierr;
757 
758   PetscFunctionBegin;
759   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
760   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNC__
765 #define __FUNC__ "MatDiagonalScale_MPIDense"
766 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
767 {
768   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
769   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
770   Scalar       *l,*r,x,*v;
771   int          ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n;
772 
773   PetscFunctionBegin;
774   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
775   if (ll) {
776     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
777     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size");
778     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
779     for ( i=0; i<m; i++ ) {
780       x = l[i];
781       v = mat->v + i;
782       for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;}
783     }
784     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
785     PLogFlops(n*m);
786   }
787   if (rr) {
788     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
789     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size");
790     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
791     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
792     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
793     for ( i=0; i<n; i++ ) {
794       x = r[i];
795       v = mat->v + i*m;
796       for ( j=0; j<m; j++ ) { (*v++) *= x;}
797     }
798     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
799     PLogFlops(n*m);
800   }
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNC__
805 #define __FUNC__ "MatNorm_MPIDense"
806 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
807 {
808   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
809   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
810   int          ierr, i, j;
811   double       sum = 0.0;
812   Scalar       *v = mat->v;
813 
814   PetscFunctionBegin;
815   if (mdn->size == 1) {
816     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
817   } else {
818     if (type == NORM_FROBENIUS) {
819       for (i=0; i<mat->n*mat->m; i++ ) {
820 #if defined(PETSC_USE_COMPLEX)
821         sum += PetscReal(PetscConj(*v)*(*v)); v++;
822 #else
823         sum += (*v)*(*v); v++;
824 #endif
825       }
826       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
827       *norm = sqrt(*norm);
828       PLogFlops(2*mat->n*mat->m);
829     } else if (type == NORM_1) {
830       double *tmp, *tmp2;
831       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
832       tmp2 = tmp + mdn->N;
833       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
834       *norm = 0.0;
835       v = mat->v;
836       for ( j=0; j<mat->n; j++ ) {
837         for ( i=0; i<mat->m; i++ ) {
838           tmp[j] += PetscAbsScalar(*v);  v++;
839         }
840       }
841       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
842       for ( j=0; j<mdn->N; j++ ) {
843         if (tmp2[j] > *norm) *norm = tmp2[j];
844       }
845       ierr = PetscFree(tmp);CHKERRQ(ierr);
846       PLogFlops(mat->n*mat->m);
847     } else if (type == NORM_INFINITY) { /* max row norm */
848       double ntemp;
849       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
850       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
851     } else {
852       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
853     }
854   }
855   PetscFunctionReturn(0);
856 }
857 
858 #undef __FUNC__
859 #define __FUNC__ "MatTranspose_MPIDense"
860 int MatTranspose_MPIDense(Mat A,Mat *matout)
861 {
862   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
863   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
864   Mat          B;
865   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
866   int          j, i, ierr;
867   Scalar       *v;
868 
869   PetscFunctionBegin;
870   if (matout == PETSC_NULL && M != N) {
871     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
872   }
873   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
874 
875   m = Aloc->m; n = Aloc->n; v = Aloc->v;
876   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
877   for ( j=0; j<n; j++ ) {
878     for (i=0; i<m; i++) rwork[i] = rstart + i;
879     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
880     v   += m;
881   }
882   ierr = PetscFree(rwork);CHKERRQ(ierr);
883   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
885   if (matout != PETSC_NULL) {
886     *matout = B;
887   } else {
888     PetscOps *Abops;
889     MatOps   Aops;
890 
891     /* This isn't really an in-place transpose, but free data struct from a */
892     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
893     ierr = MatDestroy(a->A);CHKERRQ(ierr);
894     if (a->lvec) VecDestroy(a->lvec);
895     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
896     ierr = PetscFree(a);CHKERRQ(ierr);
897 
898     /*
899          This is horrible, horrible code. We need to keep the
900       A pointers for the bops and ops but copy everything
901       else from C.
902     */
903     Abops   = A->bops;
904     Aops    = A->ops;
905     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
906     A->bops = Abops;
907     A->ops  = Aops;
908 
909     PetscHeaderDestroy(B);
910   }
911   PetscFunctionReturn(0);
912 }
913 
914 #include "pinclude/blaslapack.h"
915 #undef __FUNC__
916 #define __FUNC__ "MatScale_MPIDense"
917 int MatScale_MPIDense(Scalar *alpha,Mat inA)
918 {
919   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
920   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
921   int          one = 1, nz;
922 
923   PetscFunctionBegin;
924   nz = a->m*a->n;
925   BLscal_( &nz, alpha, a->v, &one );
926   PLogFlops(nz);
927   PetscFunctionReturn(0);
928 }
929 
930 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
931 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
932 
933 /* -------------------------------------------------------------------*/
934 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
935        MatGetRow_MPIDense,
936        MatRestoreRow_MPIDense,
937        MatMult_MPIDense,
938        MatMultAdd_MPIDense,
939        MatMultTrans_MPIDense,
940        MatMultTransAdd_MPIDense,
941        0,
942        0,
943        0,
944        0,
945        0,
946        0,
947        0,
948        MatTranspose_MPIDense,
949        MatGetInfo_MPIDense,0,
950        MatGetDiagonal_MPIDense,
951        MatDiagonalScale_MPIDense,
952        MatNorm_MPIDense,
953        MatAssemblyBegin_MPIDense,
954        MatAssemblyEnd_MPIDense,
955        0,
956        MatSetOption_MPIDense,
957        MatZeroEntries_MPIDense,
958        MatZeroRows_MPIDense,
959        0,
960        0,
961        0,
962        0,
963        MatGetSize_MPIDense,
964        MatGetLocalSize_MPIDense,
965        MatGetOwnershipRange_MPIDense,
966        0,
967        0,
968        MatGetArray_MPIDense,
969        MatRestoreArray_MPIDense,
970        MatDuplicate_MPIDense,
971        0,
972        0,
973        0,
974        0,
975        0,
976        MatGetSubMatrices_MPIDense,
977        0,
978        MatGetValues_MPIDense,
979        0,
980        0,
981        MatScale_MPIDense,
982        0,
983        0,
984        0,
985        MatGetBlockSize_MPIDense,
986        0,
987        0,
988        0,
989        0,
990        0,
991        0,
992        0,
993        0,
994        0,
995        MatGetSubMatrix_MPIDense,
996        0,
997        0,
998        MatGetMaps_Petsc};
999 
1000 #undef __FUNC__
1001 #define __FUNC__ "MatCreateMPIDense"
1002 /*@C
1003    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1004 
1005    Collective on MPI_Comm
1006 
1007    Input Parameters:
1008 +  comm - MPI communicator
1009 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1010 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1011 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1012 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1013 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1014    to control all matrix memory allocation.
1015 
1016    Output Parameter:
1017 .  A - the matrix
1018 
1019    Notes:
1020    The dense format is fully compatible with standard Fortran 77
1021    storage by columns.
1022 
1023    The data input variable is intended primarily for Fortran programmers
1024    who wish to allocate their own matrix memory space.  Most users should
1025    set data=PETSC_NULL.
1026 
1027    The user MUST specify either the local or global matrix dimensions
1028    (possibly both).
1029 
1030    Level: intermediate
1031 
1032 .keywords: matrix, dense, parallel
1033 
1034 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1035 @*/
1036 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1037 {
1038   Mat          mat;
1039   Mat_MPIDense *a;
1040   int          ierr, i,flg;
1041 
1042   PetscFunctionBegin;
1043   /* Note:  For now, when data is specified above, this assumes the user correctly
1044    allocates the local dense storage space.  We should add error checking. */
1045 
1046   *A = 0;
1047   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
1048   PLogObjectCreate(mat);
1049   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1050   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1051   mat->ops->destroy = MatDestroy_MPIDense;
1052   mat->ops->view    = MatView_MPIDense;
1053   mat->factor       = 0;
1054   mat->mapping      = 0;
1055 
1056   a->factor       = 0;
1057   mat->insertmode = NOT_SET_VALUES;
1058   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
1059   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
1060 
1061   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1062 
1063   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1064   a->nvec = n;
1065 
1066   /* each row stores all columns */
1067   a->N = mat->N = N;
1068   a->M = mat->M = M;
1069   a->m = mat->m = m;
1070   a->n = mat->n = N;   /* NOTE: n == N */
1071 
1072   /* the information in the maps duplicates the information computed below, eventually
1073      we should remove the duplicate information that is not contained in the maps */
1074   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1075   ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr);
1076 
1077   /* build local table of row and column ownerships */
1078   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1079   a->cowners = a->rowners + a->size + 1;
1080   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1081   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1082   a->rowners[0] = 0;
1083   for ( i=2; i<=a->size; i++ ) {
1084     a->rowners[i] += a->rowners[i-1];
1085   }
1086   a->rstart = a->rowners[a->rank];
1087   a->rend   = a->rowners[a->rank+1];
1088   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1089   a->cowners[0] = 0;
1090   for ( i=2; i<=a->size; i++ ) {
1091     a->cowners[i] += a->cowners[i-1];
1092   }
1093 
1094   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1095   PLogObjectParent(mat,a->A);
1096 
1097   /* build cache for off array entries formed */
1098   a->donotstash = 0;
1099   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1100 
1101   /* stuff used for matrix vector multiply */
1102   a->lvec        = 0;
1103   a->Mvctx       = 0;
1104   a->roworiented = 1;
1105 
1106   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",
1107                                      "MatGetDiagonalBlock_MPIDense",
1108                                      (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1109 
1110   *A = mat;
1111   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1112   if (flg) {
1113     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1114   }
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNC__
1119 #define __FUNC__ "MatDuplicate_MPIDense"
1120 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1121 {
1122   Mat          mat;
1123   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1124   int          ierr;
1125   FactorCtx    *factor;
1126 
1127   PetscFunctionBegin;
1128   *newmat       = 0;
1129   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1130   PLogObjectCreate(mat);
1131   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1132   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1133   mat->ops->destroy = MatDestroy_MPIDense;
1134   mat->ops->view    = MatView_MPIDense;
1135   mat->factor       = A->factor;
1136   mat->assembled    = PETSC_TRUE;
1137 
1138   a->m = mat->m = oldmat->m;
1139   a->n = mat->n = oldmat->n;
1140   a->M = mat->M = oldmat->M;
1141   a->N = mat->N = oldmat->N;
1142   if (oldmat->factor) {
1143     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1144     /* copy factor contents ... add this code! */
1145   } else a->factor = 0;
1146 
1147   a->rstart       = oldmat->rstart;
1148   a->rend         = oldmat->rend;
1149   a->size         = oldmat->size;
1150   a->rank         = oldmat->rank;
1151   mat->insertmode = NOT_SET_VALUES;
1152   a->donotstash   = oldmat->donotstash;
1153   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1154   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1155   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1156   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1157 
1158   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1159   PLogObjectParent(mat,a->lvec);
1160   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1161   PLogObjectParent(mat,a->Mvctx);
1162   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1163   PLogObjectParent(mat,a->A);
1164   *newmat = mat;
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #include "sys.h"
1169 
1170 #undef __FUNC__
1171 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1172 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1173 {
1174   int        *rowners, i,size,rank,m,ierr,nz,j;
1175   Scalar     *array,*vals,*vals_ptr;
1176   MPI_Status status;
1177 
1178   PetscFunctionBegin;
1179   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1180   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1181 
1182   /* determine ownership of all rows */
1183   m          = M/size + ((M % size) > rank);
1184   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1185   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1186   rowners[0] = 0;
1187   for ( i=2; i<=size; i++ ) {
1188     rowners[i] += rowners[i-1];
1189   }
1190 
1191   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1192   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1193 
1194   if (!rank) {
1195     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1196 
1197     /* read in my part of the matrix numerical values  */
1198     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1199 
1200     /* insert into matrix-by row (this is why cannot directly read into array */
1201     vals_ptr = vals;
1202     for ( i=0; i<m; i++ ) {
1203       for ( j=0; j<N; j++ ) {
1204         array[i + j*m] = *vals_ptr++;
1205       }
1206     }
1207 
1208     /* read in other processors and ship out */
1209     for ( i=1; i<size; i++ ) {
1210       nz   = (rowners[i+1] - rowners[i])*N;
1211       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1212       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1213     }
1214   } else {
1215     /* receive numeric values */
1216     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1217 
1218     /* receive message of values*/
1219     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1220 
1221     /* insert into matrix-by row (this is why cannot directly read into array */
1222     vals_ptr = vals;
1223     for ( i=0; i<m; i++ ) {
1224       for ( j=0; j<N; j++ ) {
1225         array[i + j*m] = *vals_ptr++;
1226       }
1227     }
1228   }
1229   ierr = PetscFree(rowners);CHKERRQ(ierr);
1230   ierr = PetscFree(vals);CHKERRQ(ierr);
1231   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1232   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 
1237 #undef __FUNC__
1238 #define __FUNC__ "MatLoad_MPIDense"
1239 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1240 {
1241   Mat          A;
1242   Scalar       *vals,*svals;
1243   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1244   MPI_Status   status;
1245   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1246   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1247   int          tag = ((PetscObject)viewer)->tag;
1248   int          i, nz, ierr, j,rstart, rend, fd;
1249 
1250   PetscFunctionBegin;
1251   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1253   if (!rank) {
1254     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1255     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1256     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1257   }
1258 
1259   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1260   M = header[1]; N = header[2]; nz = header[3];
1261 
1262   /*
1263        Handle case where matrix is stored on disk as a dense matrix
1264   */
1265   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1266     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1267     PetscFunctionReturn(0);
1268   }
1269 
1270   /* determine ownership of all rows */
1271   m          = M/size + ((M % size) > rank);
1272   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1273   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1274   rowners[0] = 0;
1275   for ( i=2; i<=size; i++ ) {
1276     rowners[i] += rowners[i-1];
1277   }
1278   rstart = rowners[rank];
1279   rend   = rowners[rank+1];
1280 
1281   /* distribute row lengths to all processors */
1282   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1283   offlens = ourlens + (rend-rstart);
1284   if (!rank) {
1285     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1286     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1287     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1288     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1289     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1290     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1291   } else {
1292     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1293   }
1294 
1295   if (!rank) {
1296     /* calculate the number of nonzeros on each processor */
1297     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1298     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1299     for ( i=0; i<size; i++ ) {
1300       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1301         procsnz[i] += rowlengths[j];
1302       }
1303     }
1304     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1305 
1306     /* determine max buffer needed and allocate it */
1307     maxnz = 0;
1308     for ( i=0; i<size; i++ ) {
1309       maxnz = PetscMax(maxnz,procsnz[i]);
1310     }
1311     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1312 
1313     /* read in my part of the matrix column indices  */
1314     nz = procsnz[0];
1315     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1316     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1317 
1318     /* read in every one elses and ship off */
1319     for ( i=1; i<size; i++ ) {
1320       nz   = procsnz[i];
1321       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1322       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1323     }
1324     ierr = PetscFree(cols);CHKERRQ(ierr);
1325   } else {
1326     /* determine buffer space needed for message */
1327     nz = 0;
1328     for ( i=0; i<m; i++ ) {
1329       nz += ourlens[i];
1330     }
1331     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1332 
1333     /* receive message of column indices*/
1334     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1335     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1336     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1337   }
1338 
1339   /* loop over local rows, determining number of off diagonal entries */
1340   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1341   jj = 0;
1342   for ( i=0; i<m; i++ ) {
1343     for ( j=0; j<ourlens[i]; j++ ) {
1344       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1345       jj++;
1346     }
1347   }
1348 
1349   /* create our matrix */
1350   for ( i=0; i<m; i++ ) {
1351     ourlens[i] -= offlens[i];
1352   }
1353   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1354   A = *newmat;
1355   for ( i=0; i<m; i++ ) {
1356     ourlens[i] += offlens[i];
1357   }
1358 
1359   if (!rank) {
1360     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1361 
1362     /* read in my part of the matrix numerical values  */
1363     nz = procsnz[0];
1364     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1365 
1366     /* insert into matrix */
1367     jj      = rstart;
1368     smycols = mycols;
1369     svals   = vals;
1370     for ( i=0; i<m; i++ ) {
1371       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1372       smycols += ourlens[i];
1373       svals   += ourlens[i];
1374       jj++;
1375     }
1376 
1377     /* read in other processors and ship out */
1378     for ( i=1; i<size; i++ ) {
1379       nz   = procsnz[i];
1380       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1381       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1382     }
1383     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1384   } else {
1385     /* receive numeric values */
1386     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1387 
1388     /* receive message of values*/
1389     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1390     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1391     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1392 
1393     /* insert into matrix */
1394     jj      = rstart;
1395     smycols = mycols;
1396     svals   = vals;
1397     for ( i=0; i<m; i++ ) {
1398       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1399       smycols += ourlens[i];
1400       svals   += ourlens[i];
1401       jj++;
1402     }
1403   }
1404   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1405   ierr = PetscFree(vals);CHKERRQ(ierr);
1406   ierr = PetscFree(mycols);CHKERRQ(ierr);
1407   ierr = PetscFree(rowners);CHKERRQ(ierr);
1408 
1409   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1410   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 
1415 
1416 
1417 
1418