xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision f1af5d2ffeae1f5fc391a89939f4818e47770ae3)
1 /*$Id: mpidense.c,v 1.130 1999/10/24 14:02:12 bsmith Exp bsmith $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNC__
12 #define __FUNC__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
16   int          m = mdn->m,rstart = mdn->rstart,rank,ierr;
17   Scalar       *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
27   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
28   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
29   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
30   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
33 
34   *iscopy = PETSC_TRUE;
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 extern int MatSetUpMultiply_MPIDense(Mat);
40 
41 #undef __FUNC__
42 #define __FUNC__ "MatSetValues_MPIDense"
43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
44 {
45   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
46   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
47   int          roworiented = A->roworiented;
48 
49   PetscFunctionBegin;
50   for ( i=0; i<m; i++ ) {
51     if (idxm[i] < 0) continue;
52     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
53     if (idxm[i] >= rstart && idxm[i] < rend) {
54       row = idxm[i] - rstart;
55       if (roworiented) {
56         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
57       } else {
58         for ( j=0; j<n; j++ ) {
59           if (idxn[j] < 0) continue;
60           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
61           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
62         }
63       }
64     } else {
65       if ( !A->donotstash) {
66         if (roworiented) {
67           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
68         } else {
69           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
70         }
71       }
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetValues_MPIDense"
79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
80 {
81   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
82   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
83 
84   PetscFunctionBegin;
85   for ( i=0; i<m; i++ ) {
86     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
87     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
88     if (idxm[i] >= rstart && idxm[i] < rend) {
89       row = idxm[i] - rstart;
90       for ( j=0; j<n; j++ ) {
91         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
92         if (idxn[j] >= mdn->N) {
93           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
94         }
95         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
96       }
97     } else {
98       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
99     }
100   }
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNC__
105 #define __FUNC__ "MatGetArray_MPIDense"
106 int MatGetArray_MPIDense(Mat A,Scalar **array)
107 {
108   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
109   int          ierr;
110 
111   PetscFunctionBegin;
112   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNC__
117 #define __FUNC__ "MatGetSubMatrix_MPIDense"
118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
119 {
120   Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd;
121   Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data;
122   int          i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank;
123   Scalar       *av, *bv, *v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
128   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
129   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
130   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
131   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
132 
133   /* No parallel redistribution currently supported! Should really check each index set
134      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
135      original matrix! */
136 
137   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
138   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
139 
140   /* Check submatrix call */
141   if (scall == MAT_REUSE_MATRIX) {
142     /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */
143     /* Really need to test rows and column sizes! */
144     newmat = *B;
145   } else {
146     /* Create and fill new matrix */
147     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
148   }
149 
150   /* Now extract the data pointers and do the copy, column at a time */
151   newmatd = (Mat_MPIDense *) newmat->data;
152   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
153 
154   for ( i=0; i<ncols; i++ ) {
155     av = v + nlrows*icol[i];
156     for (j=0; j<nrows; j++ ) {
157       *bv++ = av[irow[j] - rstart];
158     }
159   }
160 
161   /* Assemble the matrices so that the correct flags are set */
162   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
163   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164 
165   /* Free work space */
166   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
167   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
168   *B = newmat;
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNC__
173 #define __FUNC__ "MatRestoreArray_MPIDense"
174 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
175 {
176   PetscFunctionBegin;
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNC__
181 #define __FUNC__ "MatAssemblyBegin_MPIDense"
182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
183 {
184   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
185   MPI_Comm     comm = mat->comm;
186   int          ierr,nstash,reallocs;
187   InsertMode   addv;
188 
189   PetscFunctionBegin;
190   /* make sure all processors are either in INSERTMODE or ADDMODE */
191   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
192   if (addv == (ADD_VALUES|INSERT_VALUES)) {
193     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
194   }
195   mat->insertmode = addv; /* in case this processor had no cache */
196 
197   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
198   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
199   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
200            nstash,reallocs);
201   PetscFunctionReturn(0);
202 }
203 
204 #undef __FUNC__
205 #define __FUNC__ "MatAssemblyEnd_MPIDense"
206 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
207 {
208   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
209   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
210   Scalar       *val;
211   InsertMode   addv=mat->insertmode;
212 
213   PetscFunctionBegin;
214   /*  wait on receives */
215   while (1) {
216     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
217     if (!flg) break;
218 
219     for ( i=0; i<n; ) {
220       /* Now identify the consecutive vals belonging to the same row */
221       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
222       if (j < n) ncols = j-i;
223       else       ncols = n-i;
224       /* Now assemble all these values with a single function call */
225       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
226       i = j;
227     }
228   }
229   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
230 
231   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
232   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
233 
234   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
235     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
236   }
237   PetscFunctionReturn(0);
238 }
239 
240 #undef __FUNC__
241 #define __FUNC__ "MatZeroEntries_MPIDense"
242 int MatZeroEntries_MPIDense(Mat A)
243 {
244   int          ierr;
245   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
246 
247   PetscFunctionBegin;
248   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
249   PetscFunctionReturn(0);
250 }
251 
252 #undef __FUNC__
253 #define __FUNC__ "MatGetBlockSize_MPIDense"
254 int MatGetBlockSize_MPIDense(Mat A,int *bs)
255 {
256   PetscFunctionBegin;
257   *bs = 1;
258   PetscFunctionReturn(0);
259 }
260 
261 /* the code does not do the diagonal entries correctly unless the
262    matrix is square and the column and row owerships are identical.
263    This is a BUG. The only way to fix it seems to be to access
264    mdn->A and mdn->B directly and not through the MatZeroRows()
265    routine.
266 */
267 #undef __FUNC__
268 #define __FUNC__ "MatZeroRows_MPIDense"
269 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
270 {
271   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
272   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
273   int            *procs,*nprocs,j,found,idx,nsends,*work;
274   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
275   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
276   int            *lens,imdex,*lrows,*values;
277   MPI_Comm       comm = A->comm;
278   MPI_Request    *send_waits,*recv_waits;
279   MPI_Status     recv_status,*send_status;
280   IS             istmp;
281 
282   PetscFunctionBegin;
283   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
284   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
285 
286   /*  first count number of contributors to each processor */
287   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
288   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
289   procs  = nprocs + size;
290   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
291   for ( i=0; i<N; i++ ) {
292     idx = rows[i];
293     found = 0;
294     for ( j=0; j<size; j++ ) {
295       if (idx >= owners[j] && idx < owners[j+1]) {
296         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
297       }
298     }
299     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
300   }
301   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
302 
303   /* inform other processors of number of messages and max length*/
304   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
305   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
306   nmax   = work[rank];
307   nrecvs = work[size+rank];
308   ierr   = PetscFree(work);CHKERRQ(ierr);
309 
310   /* post receives:   */
311   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
312   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
313   for ( i=0; i<nrecvs; i++ ) {
314     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
315   }
316 
317   /* do sends:
318       1) starts[i] gives the starting index in svalues for stuff going to
319          the ith processor
320   */
321   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
322   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
323   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
324   starts[0]  = 0;
325   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
326   for ( i=0; i<N; i++ ) {
327     svalues[starts[owner[i]]++] = rows[i];
328   }
329   ISRestoreIndices(is,&rows);
330 
331   starts[0] = 0;
332   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
333   count = 0;
334   for ( i=0; i<size; i++ ) {
335     if (procs[i]) {
336       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
337     }
338   }
339   ierr = PetscFree(starts);CHKERRQ(ierr);
340 
341   base = owners[rank];
342 
343   /*  wait on receives */
344   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
345   source = lens + nrecvs;
346   count  = nrecvs; slen = 0;
347   while (count) {
348     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
349     /* unpack receives into our local space */
350     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
351     source[imdex]  = recv_status.MPI_SOURCE;
352     lens[imdex]  = n;
353     slen += n;
354     count--;
355   }
356   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
357 
358   /* move the data into the send scatter */
359   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
360   count = 0;
361   for ( i=0; i<nrecvs; i++ ) {
362     values = rvalues + i*nmax;
363     for ( j=0; j<lens[i]; j++ ) {
364       lrows[count++] = values[j] - base;
365     }
366   }
367   ierr = PetscFree(rvalues);CHKERRQ(ierr);
368   ierr = PetscFree(lens);CHKERRQ(ierr);
369   ierr = PetscFree(owner);CHKERRQ(ierr);
370   ierr = PetscFree(nprocs);CHKERRQ(ierr);
371 
372   /* actually zap the local rows */
373   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
374   PLogObjectParent(A,istmp);
375   ierr = PetscFree(lrows);CHKERRQ(ierr);
376   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
377   ierr = ISDestroy(istmp);CHKERRQ(ierr);
378 
379   /* wait on sends */
380   if (nsends) {
381     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
382     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
383     ierr = PetscFree(send_status);CHKERRQ(ierr);
384   }
385   ierr = PetscFree(send_waits);CHKERRQ(ierr);
386   ierr = PetscFree(svalues);CHKERRQ(ierr);
387 
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNC__
392 #define __FUNC__ "MatMult_MPIDense"
393 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
394 {
395   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
396   int          ierr;
397 
398   PetscFunctionBegin;
399   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
401   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNC__
406 #define __FUNC__ "MatMultAdd_MPIDense"
407 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
408 {
409   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
410   int          ierr;
411 
412   PetscFunctionBegin;
413   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
415   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNC__
420 #define __FUNC__ "MatMultTrans_MPIDense"
421 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
422 {
423   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
424   int          ierr;
425   Scalar       zero = 0.0;
426 
427   PetscFunctionBegin;
428   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
429   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
430   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
431   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 #undef __FUNC__
436 #define __FUNC__ "MatMultTransAdd_MPIDense"
437 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
438 {
439   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
440   int          ierr;
441 
442   PetscFunctionBegin;
443   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
444   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
445   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNC__
451 #define __FUNC__ "MatGetDiagonal_MPIDense"
452 int MatGetDiagonal_MPIDense(Mat A,Vec v)
453 {
454   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
455   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
456   int          ierr, len, i, n, m = a->m, radd;
457   Scalar       *x, zero = 0.0;
458 
459   PetscFunctionBegin;
460   VecSet(&zero,v);
461   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
462   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
463   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
464   len = PetscMin(aloc->m,aloc->n);
465   radd = a->rstart*m;
466   for ( i=0; i<len; i++ ) {
467     x[i] = aloc->v[radd + i*m + i];
468   }
469   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
470   PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNC__
474 #define __FUNC__ "MatDestroy_MPIDense"
475 int MatDestroy_MPIDense(Mat mat)
476 {
477   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
478   int          ierr;
479 
480   PetscFunctionBegin;
481 
482   if (mat->mapping) {
483     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
484   }
485   if (mat->bmapping) {
486     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
487   }
488 #if defined(PETSC_USE_LOG)
489   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
490 #endif
491   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
492   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
493   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
494   if (mdn->lvec)   VecDestroy(mdn->lvec);
495   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
496   if (mdn->factor) {
497     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
498     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
499     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
500     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
501   }
502   ierr = PetscFree(mdn);CHKERRQ(ierr);
503   if (mat->rmap) {
504     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
505   }
506   if (mat->cmap) {
507     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
508   }
509   PLogObjectDestroy(mat);
510   PetscHeaderDestroy(mat);
511   PetscFunctionReturn(0);
512 }
513 
514 #undef __FUNC__
515 #define __FUNC__ "MatView_MPIDense_Binary"
516 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
517 {
518   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
519   int          ierr;
520 
521   PetscFunctionBegin;
522   if (mdn->size == 1) {
523     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
524   }
525   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
526   PetscFunctionReturn(0);
527 }
528 
529 #undef __FUNC__
530 #define __FUNC__ "MatView_MPIDense_ASCIIorDraworSocket"
531 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,Viewer viewer)
532 {
533   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
534   int          ierr, format, size = mdn->size, rank = mdn->rank;
535   ViewerType   vtype;
536   PetscTruth   isascii,isdraw;
537 
538   PetscFunctionBegin;
539   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
540   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
541   if (isascii) {
542     ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
543     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
544     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
545       MatInfo info;
546       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
547       ierr = ViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
548                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
549       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
550       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
551       PetscFunctionReturn(0);
552     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
553       PetscFunctionReturn(0);
554     }
555   } else if (isdraw) {
556     Draw       draw;
557     PetscTruth isnull;
558 
559     ierr = ViewerDrawGetDraw(viewer, 0, &draw);CHKERRQ(ierr);
560     ierr = DrawIsNull(draw, &isnull);CHKERRQ(ierr);
561     if (isnull) PetscFunctionReturn(0);
562   }
563 
564   if (size == 1) {
565     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
566   } else {
567     /* assemble the entire matrix onto first processor. */
568     Mat          A;
569     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
570     Scalar       *vals;
571     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
572 
573     if (!rank) {
574       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
575     } else {
576       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
577     }
578     PLogObjectParent(mat,A);
579 
580     /* Copy the matrix ... This isn't the most efficient means,
581        but it's quick for now */
582     row = mdn->rstart; m = Amdn->m;
583     for ( i=0; i<m; i++ ) {
584       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
585       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
586       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
587       row++;
588     }
589 
590     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
591     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
592     if (!rank) {
593       Viewer sviewer;
594       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
595       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
596       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
597     }
598     ierr = ViewerFlush(viewer);CHKERRQ(ierr);
599     ierr = MatDestroy(A);CHKERRQ(ierr);
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNC__
605 #define __FUNC__ "MatView_MPIDense"
606 int MatView_MPIDense(Mat mat,Viewer viewer)
607 {
608   int        ierr;
609   PetscTruth isascii,isbinary,isdraw,issocket;
610 
611   PetscFunctionBegin;
612 
613   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
614   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
615   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
616   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
617 
618   if (isascii || issocket || isdraw) {
619     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
620   } else if (isbinary) {
621     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
622   } else {
623     SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 #undef __FUNC__
629 #define __FUNC__ "MatGetInfo_MPIDense"
630 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
631 {
632   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
633   Mat          mdn = mat->A;
634   int          ierr;
635   double       isend[5], irecv[5];
636 
637   PetscFunctionBegin;
638   info->rows_global    = (double)mat->M;
639   info->columns_global = (double)mat->N;
640   info->rows_local     = (double)mat->m;
641   info->columns_local  = (double)mat->N;
642   info->block_size     = 1.0;
643   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
644   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
645   isend[3] = info->memory;  isend[4] = info->mallocs;
646   if (flag == MAT_LOCAL) {
647     info->nz_used      = isend[0];
648     info->nz_allocated = isend[1];
649     info->nz_unneeded  = isend[2];
650     info->memory       = isend[3];
651     info->mallocs      = isend[4];
652   } else if (flag == MAT_GLOBAL_MAX) {
653     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
654     info->nz_used      = irecv[0];
655     info->nz_allocated = irecv[1];
656     info->nz_unneeded  = irecv[2];
657     info->memory       = irecv[3];
658     info->mallocs      = irecv[4];
659   } else if (flag == MAT_GLOBAL_SUM) {
660     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
661     info->nz_used      = irecv[0];
662     info->nz_allocated = irecv[1];
663     info->nz_unneeded  = irecv[2];
664     info->memory       = irecv[3];
665     info->mallocs      = irecv[4];
666   }
667   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
668   info->fill_ratio_needed = 0;
669   info->factor_mallocs    = 0;
670   PetscFunctionReturn(0);
671 }
672 
673 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
674    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
675    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
676    extern int MatSolve_MPIDense(Mat,Vec,Vec);
677    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
678    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
679    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
680 
681 #undef __FUNC__
682 #define __FUNC__ "MatSetOption_MPIDense"
683 int MatSetOption_MPIDense(Mat A,MatOption op)
684 {
685   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
686 
687   PetscFunctionBegin;
688   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
689       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
690       op == MAT_NEW_NONZERO_LOCATION_ERR ||
691       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
692       op == MAT_COLUMNS_SORTED ||
693       op == MAT_COLUMNS_UNSORTED) {
694         MatSetOption(a->A,op);
695   } else if (op == MAT_ROW_ORIENTED) {
696         a->roworiented = 1;
697         MatSetOption(a->A,op);
698   } else if (op == MAT_ROWS_SORTED ||
699              op == MAT_ROWS_UNSORTED ||
700              op == MAT_SYMMETRIC ||
701              op == MAT_STRUCTURALLY_SYMMETRIC ||
702              op == MAT_YES_NEW_DIAGONALS ||
703              op == MAT_USE_HASH_TABLE) {
704     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
705   } else if (op == MAT_COLUMN_ORIENTED) {
706     a->roworiented = 0; MatSetOption(a->A,op);
707   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
708     a->donotstash = 1;
709   } else if (op == MAT_NO_NEW_DIAGONALS) {
710     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
711   } else {
712     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
713   }
714   PetscFunctionReturn(0);
715 }
716 
717 #undef __FUNC__
718 #define __FUNC__ "MatGetSize_MPIDense"
719 int MatGetSize_MPIDense(Mat A,int *m,int *n)
720 {
721   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
722 
723   PetscFunctionBegin;
724   if (m) *m = mat->M;
725   if (n) *n = mat->N;
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNC__
730 #define __FUNC__ "MatGetLocalSize_MPIDense"
731 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
732 {
733   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
734 
735   PetscFunctionBegin;
736   *m = mat->m; *n = mat->N;
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNC__
741 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
742 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
743 {
744   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
745 
746   PetscFunctionBegin;
747   *m = mat->rstart; *n = mat->rend;
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNC__
752 #define __FUNC__ "MatGetRow_MPIDense"
753 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
754 {
755   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
756   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
757 
758   PetscFunctionBegin;
759   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
760   lrow = row - rstart;
761   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNC__
766 #define __FUNC__ "MatRestoreRow_MPIDense"
767 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
768 {
769   int ierr;
770 
771   PetscFunctionBegin;
772   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
773   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
774   PetscFunctionReturn(0);
775 }
776 
777 #undef __FUNC__
778 #define __FUNC__ "MatDiagonalScale_MPIDense"
779 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
780 {
781   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
782   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
783   Scalar       *l,*r,x,*v;
784   int          ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n;
785 
786   PetscFunctionBegin;
787   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
788   if (ll) {
789     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
790     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size");
791     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
792     for ( i=0; i<m; i++ ) {
793       x = l[i];
794       v = mat->v + i;
795       for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;}
796     }
797     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
798     PLogFlops(n*m);
799   }
800   if (rr) {
801     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
802     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size");
803     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
804     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
805     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
806     for ( i=0; i<n; i++ ) {
807       x = r[i];
808       v = mat->v + i*m;
809       for ( j=0; j<m; j++ ) { (*v++) *= x;}
810     }
811     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
812     PLogFlops(n*m);
813   }
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNC__
818 #define __FUNC__ "MatNorm_MPIDense"
819 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
820 {
821   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
822   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
823   int          ierr, i, j;
824   double       sum = 0.0;
825   Scalar       *v = mat->v;
826 
827   PetscFunctionBegin;
828   if (mdn->size == 1) {
829     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
830   } else {
831     if (type == NORM_FROBENIUS) {
832       for (i=0; i<mat->n*mat->m; i++ ) {
833 #if defined(PETSC_USE_COMPLEX)
834         sum += PetscReal(PetscConj(*v)*(*v)); v++;
835 #else
836         sum += (*v)*(*v); v++;
837 #endif
838       }
839       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
840       *norm = sqrt(*norm);
841       PLogFlops(2*mat->n*mat->m);
842     } else if (type == NORM_1) {
843       double *tmp, *tmp2;
844       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
845       tmp2 = tmp + mdn->N;
846       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
847       *norm = 0.0;
848       v = mat->v;
849       for ( j=0; j<mat->n; j++ ) {
850         for ( i=0; i<mat->m; i++ ) {
851           tmp[j] += PetscAbsScalar(*v);  v++;
852         }
853       }
854       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
855       for ( j=0; j<mdn->N; j++ ) {
856         if (tmp2[j] > *norm) *norm = tmp2[j];
857       }
858       ierr = PetscFree(tmp);CHKERRQ(ierr);
859       PLogFlops(mat->n*mat->m);
860     } else if (type == NORM_INFINITY) { /* max row norm */
861       double ntemp;
862       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
863       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
864     } else {
865       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
866     }
867   }
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNC__
872 #define __FUNC__ "MatTranspose_MPIDense"
873 int MatTranspose_MPIDense(Mat A,Mat *matout)
874 {
875   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
876   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
877   Mat          B;
878   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
879   int          j, i, ierr;
880   Scalar       *v;
881 
882   PetscFunctionBegin;
883   if (matout == PETSC_NULL && M != N) {
884     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
885   }
886   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
887 
888   m = Aloc->m; n = Aloc->n; v = Aloc->v;
889   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
890   for ( j=0; j<n; j++ ) {
891     for (i=0; i<m; i++) rwork[i] = rstart + i;
892     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
893     v   += m;
894   }
895   ierr = PetscFree(rwork);CHKERRQ(ierr);
896   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898   if (matout != PETSC_NULL) {
899     *matout = B;
900   } else {
901     PetscOps *Abops;
902     MatOps   Aops;
903 
904     /* This isn't really an in-place transpose, but free data struct from a */
905     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
906     ierr = MatDestroy(a->A);CHKERRQ(ierr);
907     if (a->lvec) VecDestroy(a->lvec);
908     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
909     ierr = PetscFree(a);CHKERRQ(ierr);
910 
911     /*
912          This is horrible, horrible code. We need to keep the
913       A pointers for the bops and ops but copy everything
914       else from C.
915     */
916     Abops   = A->bops;
917     Aops    = A->ops;
918     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
919     A->bops = Abops;
920     A->ops  = Aops;
921 
922     PetscHeaderDestroy(B);
923   }
924   PetscFunctionReturn(0);
925 }
926 
927 #include "pinclude/blaslapack.h"
928 #undef __FUNC__
929 #define __FUNC__ "MatScale_MPIDense"
930 int MatScale_MPIDense(Scalar *alpha,Mat inA)
931 {
932   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
933   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
934   int          one = 1, nz;
935 
936   PetscFunctionBegin;
937   nz = a->m*a->n;
938   BLscal_( &nz, alpha, a->v, &one );
939   PLogFlops(nz);
940   PetscFunctionReturn(0);
941 }
942 
943 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
944 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
945 
946 /* -------------------------------------------------------------------*/
947 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
948        MatGetRow_MPIDense,
949        MatRestoreRow_MPIDense,
950        MatMult_MPIDense,
951        MatMultAdd_MPIDense,
952        MatMultTrans_MPIDense,
953        MatMultTransAdd_MPIDense,
954        0,
955        0,
956        0,
957        0,
958        0,
959        0,
960        0,
961        MatTranspose_MPIDense,
962        MatGetInfo_MPIDense,0,
963        MatGetDiagonal_MPIDense,
964        MatDiagonalScale_MPIDense,
965        MatNorm_MPIDense,
966        MatAssemblyBegin_MPIDense,
967        MatAssemblyEnd_MPIDense,
968        0,
969        MatSetOption_MPIDense,
970        MatZeroEntries_MPIDense,
971        MatZeroRows_MPIDense,
972        0,
973        0,
974        0,
975        0,
976        MatGetSize_MPIDense,
977        MatGetLocalSize_MPIDense,
978        MatGetOwnershipRange_MPIDense,
979        0,
980        0,
981        MatGetArray_MPIDense,
982        MatRestoreArray_MPIDense,
983        MatDuplicate_MPIDense,
984        0,
985        0,
986        0,
987        0,
988        0,
989        MatGetSubMatrices_MPIDense,
990        0,
991        MatGetValues_MPIDense,
992        0,
993        0,
994        MatScale_MPIDense,
995        0,
996        0,
997        0,
998        MatGetBlockSize_MPIDense,
999        0,
1000        0,
1001        0,
1002        0,
1003        0,
1004        0,
1005        0,
1006        0,
1007        0,
1008        MatGetSubMatrix_MPIDense,
1009        0,
1010        0,
1011        MatGetMaps_Petsc};
1012 
1013 #undef __FUNC__
1014 #define __FUNC__ "MatCreateMPIDense"
1015 /*@C
1016    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1017 
1018    Collective on MPI_Comm
1019 
1020    Input Parameters:
1021 +  comm - MPI communicator
1022 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1023 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1024 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1025 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1026 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1027    to control all matrix memory allocation.
1028 
1029    Output Parameter:
1030 .  A - the matrix
1031 
1032    Notes:
1033    The dense format is fully compatible with standard Fortran 77
1034    storage by columns.
1035 
1036    The data input variable is intended primarily for Fortran programmers
1037    who wish to allocate their own matrix memory space.  Most users should
1038    set data=PETSC_NULL.
1039 
1040    The user MUST specify either the local or global matrix dimensions
1041    (possibly both).
1042 
1043    Level: intermediate
1044 
1045 .keywords: matrix, dense, parallel
1046 
1047 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1048 @*/
1049 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1050 {
1051   Mat          mat;
1052   Mat_MPIDense *a;
1053   int          ierr, i;
1054   PetscTruth   flg;
1055 
1056   PetscFunctionBegin;
1057   /* Note:  For now, when data is specified above, this assumes the user correctly
1058    allocates the local dense storage space.  We should add error checking. */
1059 
1060   *A = 0;
1061   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
1062   PLogObjectCreate(mat);
1063   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1064   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1065   mat->ops->destroy = MatDestroy_MPIDense;
1066   mat->ops->view    = MatView_MPIDense;
1067   mat->factor       = 0;
1068   mat->mapping      = 0;
1069 
1070   a->factor       = 0;
1071   mat->insertmode = NOT_SET_VALUES;
1072   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
1073   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
1074 
1075   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1076 
1077   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1078   a->nvec = n;
1079 
1080   /* each row stores all columns */
1081   a->N = mat->N = N;
1082   a->M = mat->M = M;
1083   a->m = mat->m = m;
1084   a->n = mat->n = N;   /* NOTE: n == N */
1085 
1086   /* the information in the maps duplicates the information computed below, eventually
1087      we should remove the duplicate information that is not contained in the maps */
1088   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1089   ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr);
1090 
1091   /* build local table of row and column ownerships */
1092   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1093   a->cowners = a->rowners + a->size + 1;
1094   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1095   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1096   a->rowners[0] = 0;
1097   for ( i=2; i<=a->size; i++ ) {
1098     a->rowners[i] += a->rowners[i-1];
1099   }
1100   a->rstart = a->rowners[a->rank];
1101   a->rend   = a->rowners[a->rank+1];
1102   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1103   a->cowners[0] = 0;
1104   for ( i=2; i<=a->size; i++ ) {
1105     a->cowners[i] += a->cowners[i-1];
1106   }
1107 
1108   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1109   PLogObjectParent(mat,a->A);
1110 
1111   /* build cache for off array entries formed */
1112   a->donotstash = 0;
1113   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1114 
1115   /* stuff used for matrix vector multiply */
1116   a->lvec        = 0;
1117   a->Mvctx       = 0;
1118   a->roworiented = 1;
1119 
1120   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1121                                      "MatGetDiagonalBlock_MPIDense",
1122                                      (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1123 
1124   *A = mat;
1125   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1126   if (flg) {
1127     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1128   }
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 #undef __FUNC__
1133 #define __FUNC__ "MatDuplicate_MPIDense"
1134 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1135 {
1136   Mat          mat;
1137   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1138   int          ierr;
1139   FactorCtx    *factor;
1140 
1141   PetscFunctionBegin;
1142   *newmat       = 0;
1143   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1144   PLogObjectCreate(mat);
1145   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1146   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1147   mat->ops->destroy = MatDestroy_MPIDense;
1148   mat->ops->view    = MatView_MPIDense;
1149   mat->factor       = A->factor;
1150   mat->assembled    = PETSC_TRUE;
1151 
1152   a->m = mat->m = oldmat->m;
1153   a->n = mat->n = oldmat->n;
1154   a->M = mat->M = oldmat->M;
1155   a->N = mat->N = oldmat->N;
1156   if (oldmat->factor) {
1157     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1158     /* copy factor contents ... add this code! */
1159   } else a->factor = 0;
1160 
1161   a->rstart       = oldmat->rstart;
1162   a->rend         = oldmat->rend;
1163   a->size         = oldmat->size;
1164   a->rank         = oldmat->rank;
1165   mat->insertmode = NOT_SET_VALUES;
1166   a->donotstash   = oldmat->donotstash;
1167   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1168   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1169   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1170   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1171 
1172   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1173   PLogObjectParent(mat,a->lvec);
1174   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1175   PLogObjectParent(mat,a->Mvctx);
1176   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1177   PLogObjectParent(mat,a->A);
1178   *newmat = mat;
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #include "sys.h"
1183 
1184 #undef __FUNC__
1185 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1186 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1187 {
1188   int        *rowners, i,size,rank,m,ierr,nz,j;
1189   Scalar     *array,*vals,*vals_ptr;
1190   MPI_Status status;
1191 
1192   PetscFunctionBegin;
1193   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195 
1196   /* determine ownership of all rows */
1197   m          = M/size + ((M % size) > rank);
1198   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1199   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1200   rowners[0] = 0;
1201   for ( i=2; i<=size; i++ ) {
1202     rowners[i] += rowners[i-1];
1203   }
1204 
1205   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1206   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1207 
1208   if (!rank) {
1209     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1210 
1211     /* read in my part of the matrix numerical values  */
1212     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1213 
1214     /* insert into matrix-by row (this is why cannot directly read into array */
1215     vals_ptr = vals;
1216     for ( i=0; i<m; i++ ) {
1217       for ( j=0; j<N; j++ ) {
1218         array[i + j*m] = *vals_ptr++;
1219       }
1220     }
1221 
1222     /* read in other processors and ship out */
1223     for ( i=1; i<size; i++ ) {
1224       nz   = (rowners[i+1] - rowners[i])*N;
1225       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1226       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1227     }
1228   } else {
1229     /* receive numeric values */
1230     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1231 
1232     /* receive message of values*/
1233     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1234 
1235     /* insert into matrix-by row (this is why cannot directly read into array */
1236     vals_ptr = vals;
1237     for ( i=0; i<m; i++ ) {
1238       for ( j=0; j<N; j++ ) {
1239         array[i + j*m] = *vals_ptr++;
1240       }
1241     }
1242   }
1243   ierr = PetscFree(rowners);CHKERRQ(ierr);
1244   ierr = PetscFree(vals);CHKERRQ(ierr);
1245   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1246   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 
1251 #undef __FUNC__
1252 #define __FUNC__ "MatLoad_MPIDense"
1253 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1254 {
1255   Mat          A;
1256   Scalar       *vals,*svals;
1257   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1258   MPI_Status   status;
1259   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1260   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1261   int          tag = ((PetscObject)viewer)->tag;
1262   int          i, nz, ierr, j,rstart, rend, fd;
1263 
1264   PetscFunctionBegin;
1265   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1266   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1267   if (!rank) {
1268     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1269     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1270     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1271   }
1272 
1273   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1274   M = header[1]; N = header[2]; nz = header[3];
1275 
1276   /*
1277        Handle case where matrix is stored on disk as a dense matrix
1278   */
1279   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1280     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1281     PetscFunctionReturn(0);
1282   }
1283 
1284   /* determine ownership of all rows */
1285   m          = M/size + ((M % size) > rank);
1286   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1287   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1288   rowners[0] = 0;
1289   for ( i=2; i<=size; i++ ) {
1290     rowners[i] += rowners[i-1];
1291   }
1292   rstart = rowners[rank];
1293   rend   = rowners[rank+1];
1294 
1295   /* distribute row lengths to all processors */
1296   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1297   offlens = ourlens + (rend-rstart);
1298   if (!rank) {
1299     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1300     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1301     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1302     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1303     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1304     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1305   } else {
1306     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1307   }
1308 
1309   if (!rank) {
1310     /* calculate the number of nonzeros on each processor */
1311     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1312     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1313     for ( i=0; i<size; i++ ) {
1314       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1315         procsnz[i] += rowlengths[j];
1316       }
1317     }
1318     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1319 
1320     /* determine max buffer needed and allocate it */
1321     maxnz = 0;
1322     for ( i=0; i<size; i++ ) {
1323       maxnz = PetscMax(maxnz,procsnz[i]);
1324     }
1325     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1326 
1327     /* read in my part of the matrix column indices  */
1328     nz = procsnz[0];
1329     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1330     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1331 
1332     /* read in every one elses and ship off */
1333     for ( i=1; i<size; i++ ) {
1334       nz   = procsnz[i];
1335       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1336       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1337     }
1338     ierr = PetscFree(cols);CHKERRQ(ierr);
1339   } else {
1340     /* determine buffer space needed for message */
1341     nz = 0;
1342     for ( i=0; i<m; i++ ) {
1343       nz += ourlens[i];
1344     }
1345     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1346 
1347     /* receive message of column indices*/
1348     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1349     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1350     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1351   }
1352 
1353   /* loop over local rows, determining number of off diagonal entries */
1354   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1355   jj = 0;
1356   for ( i=0; i<m; i++ ) {
1357     for ( j=0; j<ourlens[i]; j++ ) {
1358       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1359       jj++;
1360     }
1361   }
1362 
1363   /* create our matrix */
1364   for ( i=0; i<m; i++ ) {
1365     ourlens[i] -= offlens[i];
1366   }
1367   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1368   A = *newmat;
1369   for ( i=0; i<m; i++ ) {
1370     ourlens[i] += offlens[i];
1371   }
1372 
1373   if (!rank) {
1374     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1375 
1376     /* read in my part of the matrix numerical values  */
1377     nz = procsnz[0];
1378     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1379 
1380     /* insert into matrix */
1381     jj      = rstart;
1382     smycols = mycols;
1383     svals   = vals;
1384     for ( i=0; i<m; i++ ) {
1385       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1386       smycols += ourlens[i];
1387       svals   += ourlens[i];
1388       jj++;
1389     }
1390 
1391     /* read in other processors and ship out */
1392     for ( i=1; i<size; i++ ) {
1393       nz   = procsnz[i];
1394       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1395       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1396     }
1397     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1398   } else {
1399     /* receive numeric values */
1400     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1401 
1402     /* receive message of values*/
1403     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1404     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1405     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1406 
1407     /* insert into matrix */
1408     jj      = rstart;
1409     smycols = mycols;
1410     svals   = vals;
1411     for ( i=0; i<m; i++ ) {
1412       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1413       smycols += ourlens[i];
1414       svals   += ourlens[i];
1415       jj++;
1416     }
1417   }
1418   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1419   ierr = PetscFree(vals);CHKERRQ(ierr);
1420   ierr = PetscFree(mycols);CHKERRQ(ierr);
1421   ierr = PetscFree(rowners);CHKERRQ(ierr);
1422 
1423   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1424   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 
1429 
1430 
1431 
1432