xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision c38d4ed214221df9ea04de46f7761bef149d00ff)
1 /*$Id: mpidense.c,v 1.131 1999/11/05 14:45:15 bsmith Exp bsmith $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNC__
12 #define __FUNC__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
16   int          m = mdn->m,rstart = mdn->rstart,rank,ierr;
17   Scalar       *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
27   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
28   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
29   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
30   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
33 
34   *iscopy = PETSC_TRUE;
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 extern int MatSetUpMultiply_MPIDense(Mat);
40 
41 #undef __FUNC__
42 #define __FUNC__ "MatSetValues_MPIDense"
43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
44 {
45   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
46   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
47   int          roworiented = A->roworiented;
48 
49   PetscFunctionBegin;
50   for ( i=0; i<m; i++ ) {
51     if (idxm[i] < 0) continue;
52     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
53     if (idxm[i] >= rstart && idxm[i] < rend) {
54       row = idxm[i] - rstart;
55       if (roworiented) {
56         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
57       } else {
58         for ( j=0; j<n; j++ ) {
59           if (idxn[j] < 0) continue;
60           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
61           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
62         }
63       }
64     } else {
65       if ( !A->donotstash) {
66         if (roworiented) {
67           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
68         } else {
69           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
70         }
71       }
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetValues_MPIDense"
79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
80 {
81   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
82   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
83 
84   PetscFunctionBegin;
85   for ( i=0; i<m; i++ ) {
86     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
87     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
88     if (idxm[i] >= rstart && idxm[i] < rend) {
89       row = idxm[i] - rstart;
90       for ( j=0; j<n; j++ ) {
91         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
92         if (idxn[j] >= mdn->N) {
93           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
94         }
95         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
96       }
97     } else {
98       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
99     }
100   }
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNC__
105 #define __FUNC__ "MatGetArray_MPIDense"
106 int MatGetArray_MPIDense(Mat A,Scalar **array)
107 {
108   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
109   int          ierr;
110 
111   PetscFunctionBegin;
112   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNC__
117 #define __FUNC__ "MatGetSubMatrix_MPIDense"
118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
119 {
120   Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd;
121   Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data;
122   int          i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank;
123   Scalar       *av, *bv, *v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
128   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
129   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
130   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
131   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
132 
133   /* No parallel redistribution currently supported! Should really check each index set
134      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
135      original matrix! */
136 
137   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
138   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
139 
140   /* Check submatrix call */
141   if (scall == MAT_REUSE_MATRIX) {
142     /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */
143     /* Really need to test rows and column sizes! */
144     newmat = *B;
145   } else {
146     /* Create and fill new matrix */
147     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
148   }
149 
150   /* Now extract the data pointers and do the copy, column at a time */
151   newmatd = (Mat_MPIDense *) newmat->data;
152   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
153 
154   for ( i=0; i<ncols; i++ ) {
155     av = v + nlrows*icol[i];
156     for (j=0; j<nrows; j++ ) {
157       *bv++ = av[irow[j] - rstart];
158     }
159   }
160 
161   /* Assemble the matrices so that the correct flags are set */
162   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
163   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164 
165   /* Free work space */
166   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
167   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
168   *B = newmat;
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNC__
173 #define __FUNC__ "MatRestoreArray_MPIDense"
174 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
175 {
176   PetscFunctionBegin;
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNC__
181 #define __FUNC__ "MatAssemblyBegin_MPIDense"
182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
183 {
184   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
185   MPI_Comm     comm = mat->comm;
186   int          ierr,nstash,reallocs;
187   InsertMode   addv;
188 
189   PetscFunctionBegin;
190   /* make sure all processors are either in INSERTMODE or ADDMODE */
191   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
192   if (addv == (ADD_VALUES|INSERT_VALUES)) {
193     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
194   }
195   mat->insertmode = addv; /* in case this processor had no cache */
196 
197   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
198   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
199   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ "MatAssemblyEnd_MPIDense"
205 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
206 {
207   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
208   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
209   Scalar       *val;
210   InsertMode   addv=mat->insertmode;
211 
212   PetscFunctionBegin;
213   /*  wait on receives */
214   while (1) {
215     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
216     if (!flg) break;
217 
218     for ( i=0; i<n; ) {
219       /* Now identify the consecutive vals belonging to the same row */
220       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
221       if (j < n) ncols = j-i;
222       else       ncols = n-i;
223       /* Now assemble all these values with a single function call */
224       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
225       i = j;
226     }
227   }
228   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
229 
230   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNC__
240 #define __FUNC__ "MatZeroEntries_MPIDense"
241 int MatZeroEntries_MPIDense(Mat A)
242 {
243   int          ierr;
244   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
245 
246   PetscFunctionBegin;
247   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNC__
252 #define __FUNC__ "MatGetBlockSize_MPIDense"
253 int MatGetBlockSize_MPIDense(Mat A,int *bs)
254 {
255   PetscFunctionBegin;
256   *bs = 1;
257   PetscFunctionReturn(0);
258 }
259 
260 /* the code does not do the diagonal entries correctly unless the
261    matrix is square and the column and row owerships are identical.
262    This is a BUG. The only way to fix it seems to be to access
263    mdn->A and mdn->B directly and not through the MatZeroRows()
264    routine.
265 */
266 #undef __FUNC__
267 #define __FUNC__ "MatZeroRows_MPIDense"
268 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
269 {
270   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
271   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
272   int            *procs,*nprocs,j,found,idx,nsends,*work;
273   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
274   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
275   int            *lens,imdex,*lrows,*values;
276   MPI_Comm       comm = A->comm;
277   MPI_Request    *send_waits,*recv_waits;
278   MPI_Status     recv_status,*send_status;
279   IS             istmp;
280 
281   PetscFunctionBegin;
282   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
283   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
284 
285   /*  first count number of contributors to each processor */
286   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
287   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
288   procs  = nprocs + size;
289   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
290   for ( i=0; i<N; i++ ) {
291     idx = rows[i];
292     found = 0;
293     for ( j=0; j<size; j++ ) {
294       if (idx >= owners[j] && idx < owners[j+1]) {
295         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
296       }
297     }
298     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
299   }
300   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
301 
302   /* inform other processors of number of messages and max length*/
303   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
304   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
305   nmax   = work[rank];
306   nrecvs = work[size+rank];
307   ierr   = PetscFree(work);CHKERRQ(ierr);
308 
309   /* post receives:   */
310   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
311   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
312   for ( i=0; i<nrecvs; i++ ) {
313     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
314   }
315 
316   /* do sends:
317       1) starts[i] gives the starting index in svalues for stuff going to
318          the ith processor
319   */
320   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
321   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
322   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
323   starts[0]  = 0;
324   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
325   for ( i=0; i<N; i++ ) {
326     svalues[starts[owner[i]]++] = rows[i];
327   }
328   ISRestoreIndices(is,&rows);
329 
330   starts[0] = 0;
331   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
332   count = 0;
333   for ( i=0; i<size; i++ ) {
334     if (procs[i]) {
335       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
336     }
337   }
338   ierr = PetscFree(starts);CHKERRQ(ierr);
339 
340   base = owners[rank];
341 
342   /*  wait on receives */
343   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
344   source = lens + nrecvs;
345   count  = nrecvs; slen = 0;
346   while (count) {
347     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
348     /* unpack receives into our local space */
349     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
350     source[imdex]  = recv_status.MPI_SOURCE;
351     lens[imdex]  = n;
352     slen += n;
353     count--;
354   }
355   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
356 
357   /* move the data into the send scatter */
358   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
359   count = 0;
360   for ( i=0; i<nrecvs; i++ ) {
361     values = rvalues + i*nmax;
362     for ( j=0; j<lens[i]; j++ ) {
363       lrows[count++] = values[j] - base;
364     }
365   }
366   ierr = PetscFree(rvalues);CHKERRQ(ierr);
367   ierr = PetscFree(lens);CHKERRQ(ierr);
368   ierr = PetscFree(owner);CHKERRQ(ierr);
369   ierr = PetscFree(nprocs);CHKERRQ(ierr);
370 
371   /* actually zap the local rows */
372   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
373   PLogObjectParent(A,istmp);
374   ierr = PetscFree(lrows);CHKERRQ(ierr);
375   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
376   ierr = ISDestroy(istmp);CHKERRQ(ierr);
377 
378   /* wait on sends */
379   if (nsends) {
380     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
381     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
382     ierr = PetscFree(send_status);CHKERRQ(ierr);
383   }
384   ierr = PetscFree(send_waits);CHKERRQ(ierr);
385   ierr = PetscFree(svalues);CHKERRQ(ierr);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNC__
391 #define __FUNC__ "MatMult_MPIDense"
392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
395   int          ierr;
396 
397   PetscFunctionBegin;
398   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatMultAdd_MPIDense"
406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
409   int          ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNC__
419 #define __FUNC__ "MatMultTrans_MPIDense"
420 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
421 {
422   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
423   int          ierr;
424   Scalar       zero = 0.0;
425 
426   PetscFunctionBegin;
427   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
428   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
429   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNC__
435 #define __FUNC__ "MatMultTransAdd_MPIDense"
436 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
439   int          ierr;
440 
441   PetscFunctionBegin;
442   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
443   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
444   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNC__
450 #define __FUNC__ "MatGetDiagonal_MPIDense"
451 int MatGetDiagonal_MPIDense(Mat A,Vec v)
452 {
453   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
454   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
455   int          ierr, len, i, n, m = a->m, radd;
456   Scalar       *x, zero = 0.0;
457 
458   PetscFunctionBegin;
459   VecSet(&zero,v);
460   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
461   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
462   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
463   len = PetscMin(aloc->m,aloc->n);
464   radd = a->rstart*m;
465   for ( i=0; i<len; i++ ) {
466     x[i] = aloc->v[radd + i*m + i];
467   }
468   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ "MatDestroy_MPIDense"
474 int MatDestroy_MPIDense(Mat mat)
475 {
476   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
477   int          ierr;
478 
479   PetscFunctionBegin;
480 
481   if (mat->mapping) {
482     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
483   }
484   if (mat->bmapping) {
485     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
486   }
487 #if defined(PETSC_USE_LOG)
488   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
489 #endif
490   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
491   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
492   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
493   if (mdn->lvec)   VecDestroy(mdn->lvec);
494   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
495   if (mdn->factor) {
496     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
497     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
498     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
499     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
500   }
501   ierr = PetscFree(mdn);CHKERRQ(ierr);
502   if (mat->rmap) {
503     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
504   }
505   if (mat->cmap) {
506     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
507   }
508   PLogObjectDestroy(mat);
509   PetscHeaderDestroy(mat);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNC__
514 #define __FUNC__ "MatView_MPIDense_Binary"
515 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
516 {
517   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
518   int          ierr;
519 
520   PetscFunctionBegin;
521   if (mdn->size == 1) {
522     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
523   }
524   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNC__
529 #define __FUNC__ "MatView_MPIDense_ASCIIorDraworSocket"
530 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,Viewer viewer)
531 {
532   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
533   int          ierr, format, size = mdn->size, rank = mdn->rank;
534   ViewerType   vtype;
535   PetscTruth   isascii,isdraw;
536 
537   PetscFunctionBegin;
538   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
539   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
540   if (isascii) {
541     ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
542     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
543     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
544       MatInfo info;
545       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
546       ierr = ViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
547                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
548       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
549       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
550       PetscFunctionReturn(0);
551     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
552       PetscFunctionReturn(0);
553     }
554   } else if (isdraw) {
555     Draw       draw;
556     PetscTruth isnull;
557 
558     ierr = ViewerDrawGetDraw(viewer, 0, &draw);CHKERRQ(ierr);
559     ierr = DrawIsNull(draw, &isnull);CHKERRQ(ierr);
560     if (isnull) PetscFunctionReturn(0);
561   }
562 
563   if (size == 1) {
564     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
565   } else {
566     /* assemble the entire matrix onto first processor. */
567     Mat          A;
568     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
569     Scalar       *vals;
570     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
571 
572     if (!rank) {
573       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
574     } else {
575       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
576     }
577     PLogObjectParent(mat,A);
578 
579     /* Copy the matrix ... This isn't the most efficient means,
580        but it's quick for now */
581     row = mdn->rstart; m = Amdn->m;
582     for ( i=0; i<m; i++ ) {
583       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
584       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
585       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
586       row++;
587     }
588 
589     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
590     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
591     if (!rank) {
592       Viewer sviewer;
593       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
594       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
595       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
596     }
597     ierr = ViewerFlush(viewer);CHKERRQ(ierr);
598     ierr = MatDestroy(A);CHKERRQ(ierr);
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNC__
604 #define __FUNC__ "MatView_MPIDense"
605 int MatView_MPIDense(Mat mat,Viewer viewer)
606 {
607   int        ierr;
608   PetscTruth isascii,isbinary,isdraw,issocket;
609 
610   PetscFunctionBegin;
611 
612   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
613   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
614   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
615   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
616 
617   if (isascii || issocket || isdraw) {
618     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
619   } else if (isbinary) {
620     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
621   } else {
622     SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 #undef __FUNC__
628 #define __FUNC__ "MatGetInfo_MPIDense"
629 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
630 {
631   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
632   Mat          mdn = mat->A;
633   int          ierr;
634   double       isend[5], irecv[5];
635 
636   PetscFunctionBegin;
637   info->rows_global    = (double)mat->M;
638   info->columns_global = (double)mat->N;
639   info->rows_local     = (double)mat->m;
640   info->columns_local  = (double)mat->N;
641   info->block_size     = 1.0;
642   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
643   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
644   isend[3] = info->memory;  isend[4] = info->mallocs;
645   if (flag == MAT_LOCAL) {
646     info->nz_used      = isend[0];
647     info->nz_allocated = isend[1];
648     info->nz_unneeded  = isend[2];
649     info->memory       = isend[3];
650     info->mallocs      = isend[4];
651   } else if (flag == MAT_GLOBAL_MAX) {
652     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
653     info->nz_used      = irecv[0];
654     info->nz_allocated = irecv[1];
655     info->nz_unneeded  = irecv[2];
656     info->memory       = irecv[3];
657     info->mallocs      = irecv[4];
658   } else if (flag == MAT_GLOBAL_SUM) {
659     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
660     info->nz_used      = irecv[0];
661     info->nz_allocated = irecv[1];
662     info->nz_unneeded  = irecv[2];
663     info->memory       = irecv[3];
664     info->mallocs      = irecv[4];
665   }
666   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
667   info->fill_ratio_needed = 0;
668   info->factor_mallocs    = 0;
669   PetscFunctionReturn(0);
670 }
671 
672 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
673    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
674    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
675    extern int MatSolve_MPIDense(Mat,Vec,Vec);
676    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
677    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
678    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
679 
680 #undef __FUNC__
681 #define __FUNC__ "MatSetOption_MPIDense"
682 int MatSetOption_MPIDense(Mat A,MatOption op)
683 {
684   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
685 
686   PetscFunctionBegin;
687   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
688       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
689       op == MAT_NEW_NONZERO_LOCATION_ERR ||
690       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
691       op == MAT_COLUMNS_SORTED ||
692       op == MAT_COLUMNS_UNSORTED) {
693         MatSetOption(a->A,op);
694   } else if (op == MAT_ROW_ORIENTED) {
695         a->roworiented = 1;
696         MatSetOption(a->A,op);
697   } else if (op == MAT_ROWS_SORTED ||
698              op == MAT_ROWS_UNSORTED ||
699              op == MAT_SYMMETRIC ||
700              op == MAT_STRUCTURALLY_SYMMETRIC ||
701              op == MAT_YES_NEW_DIAGONALS ||
702              op == MAT_USE_HASH_TABLE) {
703     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
704   } else if (op == MAT_COLUMN_ORIENTED) {
705     a->roworiented = 0; MatSetOption(a->A,op);
706   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
707     a->donotstash = 1;
708   } else if (op == MAT_NO_NEW_DIAGONALS) {
709     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
710   } else {
711     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
712   }
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNC__
717 #define __FUNC__ "MatGetSize_MPIDense"
718 int MatGetSize_MPIDense(Mat A,int *m,int *n)
719 {
720   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
721 
722   PetscFunctionBegin;
723   if (m) *m = mat->M;
724   if (n) *n = mat->N;
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNC__
729 #define __FUNC__ "MatGetLocalSize_MPIDense"
730 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
731 {
732   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
733 
734   PetscFunctionBegin;
735   *m = mat->m; *n = mat->N;
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNC__
740 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
741 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
742 {
743   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
744 
745   PetscFunctionBegin;
746   *m = mat->rstart; *n = mat->rend;
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNC__
751 #define __FUNC__ "MatGetRow_MPIDense"
752 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
753 {
754   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
755   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
756 
757   PetscFunctionBegin;
758   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
759   lrow = row - rstart;
760   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNC__
765 #define __FUNC__ "MatRestoreRow_MPIDense"
766 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
767 {
768   int ierr;
769 
770   PetscFunctionBegin;
771   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
772   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
773   PetscFunctionReturn(0);
774 }
775 
776 #undef __FUNC__
777 #define __FUNC__ "MatDiagonalScale_MPIDense"
778 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
779 {
780   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
781   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
782   Scalar       *l,*r,x,*v;
783   int          ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n;
784 
785   PetscFunctionBegin;
786   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
787   if (ll) {
788     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
789     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size");
790     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
791     for ( i=0; i<m; i++ ) {
792       x = l[i];
793       v = mat->v + i;
794       for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;}
795     }
796     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
797     PLogFlops(n*m);
798   }
799   if (rr) {
800     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
801     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size");
802     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
803     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
804     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
805     for ( i=0; i<n; i++ ) {
806       x = r[i];
807       v = mat->v + i*m;
808       for ( j=0; j<m; j++ ) { (*v++) *= x;}
809     }
810     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
811     PLogFlops(n*m);
812   }
813   PetscFunctionReturn(0);
814 }
815 
816 #undef __FUNC__
817 #define __FUNC__ "MatNorm_MPIDense"
818 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
819 {
820   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
821   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
822   int          ierr, i, j;
823   double       sum = 0.0;
824   Scalar       *v = mat->v;
825 
826   PetscFunctionBegin;
827   if (mdn->size == 1) {
828     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
829   } else {
830     if (type == NORM_FROBENIUS) {
831       for (i=0; i<mat->n*mat->m; i++ ) {
832 #if defined(PETSC_USE_COMPLEX)
833         sum += PetscReal(PetscConj(*v)*(*v)); v++;
834 #else
835         sum += (*v)*(*v); v++;
836 #endif
837       }
838       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
839       *norm = sqrt(*norm);
840       PLogFlops(2*mat->n*mat->m);
841     } else if (type == NORM_1) {
842       double *tmp, *tmp2;
843       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
844       tmp2 = tmp + mdn->N;
845       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
846       *norm = 0.0;
847       v = mat->v;
848       for ( j=0; j<mat->n; j++ ) {
849         for ( i=0; i<mat->m; i++ ) {
850           tmp[j] += PetscAbsScalar(*v);  v++;
851         }
852       }
853       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
854       for ( j=0; j<mdn->N; j++ ) {
855         if (tmp2[j] > *norm) *norm = tmp2[j];
856       }
857       ierr = PetscFree(tmp);CHKERRQ(ierr);
858       PLogFlops(mat->n*mat->m);
859     } else if (type == NORM_INFINITY) { /* max row norm */
860       double ntemp;
861       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
862       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
863     } else {
864       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
865     }
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNC__
871 #define __FUNC__ "MatTranspose_MPIDense"
872 int MatTranspose_MPIDense(Mat A,Mat *matout)
873 {
874   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
875   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
876   Mat          B;
877   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
878   int          j, i, ierr;
879   Scalar       *v;
880 
881   PetscFunctionBegin;
882   if (matout == PETSC_NULL && M != N) {
883     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
884   }
885   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
886 
887   m = Aloc->m; n = Aloc->n; v = Aloc->v;
888   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
889   for ( j=0; j<n; j++ ) {
890     for (i=0; i<m; i++) rwork[i] = rstart + i;
891     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
892     v   += m;
893   }
894   ierr = PetscFree(rwork);CHKERRQ(ierr);
895   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
896   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897   if (matout != PETSC_NULL) {
898     *matout = B;
899   } else {
900     PetscOps *Abops;
901     MatOps   Aops;
902 
903     /* This isn't really an in-place transpose, but free data struct from a */
904     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
905     ierr = MatDestroy(a->A);CHKERRQ(ierr);
906     if (a->lvec) VecDestroy(a->lvec);
907     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
908     ierr = PetscFree(a);CHKERRQ(ierr);
909 
910     /*
911          This is horrible, horrible code. We need to keep the
912       A pointers for the bops and ops but copy everything
913       else from C.
914     */
915     Abops   = A->bops;
916     Aops    = A->ops;
917     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
918     A->bops = Abops;
919     A->ops  = Aops;
920 
921     PetscHeaderDestroy(B);
922   }
923   PetscFunctionReturn(0);
924 }
925 
926 #include "pinclude/blaslapack.h"
927 #undef __FUNC__
928 #define __FUNC__ "MatScale_MPIDense"
929 int MatScale_MPIDense(Scalar *alpha,Mat inA)
930 {
931   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
932   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
933   int          one = 1, nz;
934 
935   PetscFunctionBegin;
936   nz = a->m*a->n;
937   BLscal_( &nz, alpha, a->v, &one );
938   PLogFlops(nz);
939   PetscFunctionReturn(0);
940 }
941 
942 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
943 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
944 
945 /* -------------------------------------------------------------------*/
946 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
947        MatGetRow_MPIDense,
948        MatRestoreRow_MPIDense,
949        MatMult_MPIDense,
950        MatMultAdd_MPIDense,
951        MatMultTrans_MPIDense,
952        MatMultTransAdd_MPIDense,
953        0,
954        0,
955        0,
956        0,
957        0,
958        0,
959        0,
960        MatTranspose_MPIDense,
961        MatGetInfo_MPIDense,0,
962        MatGetDiagonal_MPIDense,
963        MatDiagonalScale_MPIDense,
964        MatNorm_MPIDense,
965        MatAssemblyBegin_MPIDense,
966        MatAssemblyEnd_MPIDense,
967        0,
968        MatSetOption_MPIDense,
969        MatZeroEntries_MPIDense,
970        MatZeroRows_MPIDense,
971        0,
972        0,
973        0,
974        0,
975        MatGetSize_MPIDense,
976        MatGetLocalSize_MPIDense,
977        MatGetOwnershipRange_MPIDense,
978        0,
979        0,
980        MatGetArray_MPIDense,
981        MatRestoreArray_MPIDense,
982        MatDuplicate_MPIDense,
983        0,
984        0,
985        0,
986        0,
987        0,
988        MatGetSubMatrices_MPIDense,
989        0,
990        MatGetValues_MPIDense,
991        0,
992        0,
993        MatScale_MPIDense,
994        0,
995        0,
996        0,
997        MatGetBlockSize_MPIDense,
998        0,
999        0,
1000        0,
1001        0,
1002        0,
1003        0,
1004        0,
1005        0,
1006        0,
1007        MatGetSubMatrix_MPIDense,
1008        0,
1009        0,
1010        MatGetMaps_Petsc};
1011 
1012 #undef __FUNC__
1013 #define __FUNC__ "MatCreateMPIDense"
1014 /*@C
1015    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1016 
1017    Collective on MPI_Comm
1018 
1019    Input Parameters:
1020 +  comm - MPI communicator
1021 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1022 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1023 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1024 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1025 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1026    to control all matrix memory allocation.
1027 
1028    Output Parameter:
1029 .  A - the matrix
1030 
1031    Notes:
1032    The dense format is fully compatible with standard Fortran 77
1033    storage by columns.
1034 
1035    The data input variable is intended primarily for Fortran programmers
1036    who wish to allocate their own matrix memory space.  Most users should
1037    set data=PETSC_NULL.
1038 
1039    The user MUST specify either the local or global matrix dimensions
1040    (possibly both).
1041 
1042    Level: intermediate
1043 
1044 .keywords: matrix, dense, parallel
1045 
1046 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1047 @*/
1048 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1049 {
1050   Mat          mat;
1051   Mat_MPIDense *a;
1052   int          ierr, i;
1053   PetscTruth   flg;
1054 
1055   PetscFunctionBegin;
1056   /* Note:  For now, when data is specified above, this assumes the user correctly
1057    allocates the local dense storage space.  We should add error checking. */
1058 
1059   *A = 0;
1060   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
1061   PLogObjectCreate(mat);
1062   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1063   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1064   mat->ops->destroy = MatDestroy_MPIDense;
1065   mat->ops->view    = MatView_MPIDense;
1066   mat->factor       = 0;
1067   mat->mapping      = 0;
1068 
1069   a->factor       = 0;
1070   mat->insertmode = NOT_SET_VALUES;
1071   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
1072   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
1073 
1074   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1075 
1076   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1077   a->nvec = n;
1078 
1079   /* each row stores all columns */
1080   a->N = mat->N = N;
1081   a->M = mat->M = M;
1082   a->m = mat->m = m;
1083   a->n = mat->n = N;   /* NOTE: n == N */
1084 
1085   /* the information in the maps duplicates the information computed below, eventually
1086      we should remove the duplicate information that is not contained in the maps */
1087   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1088   ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr);
1089 
1090   /* build local table of row and column ownerships */
1091   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1092   a->cowners = a->rowners + a->size + 1;
1093   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1094   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1095   a->rowners[0] = 0;
1096   for ( i=2; i<=a->size; i++ ) {
1097     a->rowners[i] += a->rowners[i-1];
1098   }
1099   a->rstart = a->rowners[a->rank];
1100   a->rend   = a->rowners[a->rank+1];
1101   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1102   a->cowners[0] = 0;
1103   for ( i=2; i<=a->size; i++ ) {
1104     a->cowners[i] += a->cowners[i-1];
1105   }
1106 
1107   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1108   PLogObjectParent(mat,a->A);
1109 
1110   /* build cache for off array entries formed */
1111   a->donotstash = 0;
1112   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1113 
1114   /* stuff used for matrix vector multiply */
1115   a->lvec        = 0;
1116   a->Mvctx       = 0;
1117   a->roworiented = 1;
1118 
1119   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1120                                      "MatGetDiagonalBlock_MPIDense",
1121                                      (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1122 
1123   *A = mat;
1124   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1125   if (flg) {
1126     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1127   }
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNC__
1132 #define __FUNC__ "MatDuplicate_MPIDense"
1133 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1134 {
1135   Mat          mat;
1136   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1137   int          ierr;
1138   FactorCtx    *factor;
1139 
1140   PetscFunctionBegin;
1141   *newmat       = 0;
1142   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1143   PLogObjectCreate(mat);
1144   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1145   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1146   mat->ops->destroy = MatDestroy_MPIDense;
1147   mat->ops->view    = MatView_MPIDense;
1148   mat->factor       = A->factor;
1149   mat->assembled    = PETSC_TRUE;
1150 
1151   a->m = mat->m = oldmat->m;
1152   a->n = mat->n = oldmat->n;
1153   a->M = mat->M = oldmat->M;
1154   a->N = mat->N = oldmat->N;
1155   if (oldmat->factor) {
1156     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1157     /* copy factor contents ... add this code! */
1158   } else a->factor = 0;
1159 
1160   a->rstart       = oldmat->rstart;
1161   a->rend         = oldmat->rend;
1162   a->size         = oldmat->size;
1163   a->rank         = oldmat->rank;
1164   mat->insertmode = NOT_SET_VALUES;
1165   a->donotstash   = oldmat->donotstash;
1166   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1167   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1168   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1169   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1170 
1171   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1172   PLogObjectParent(mat,a->lvec);
1173   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1174   PLogObjectParent(mat,a->Mvctx);
1175   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1176   PLogObjectParent(mat,a->A);
1177   *newmat = mat;
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #include "sys.h"
1182 
1183 #undef __FUNC__
1184 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1185 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1186 {
1187   int        *rowners, i,size,rank,m,ierr,nz,j;
1188   Scalar     *array,*vals,*vals_ptr;
1189   MPI_Status status;
1190 
1191   PetscFunctionBegin;
1192   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1193   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1194 
1195   /* determine ownership of all rows */
1196   m          = M/size + ((M % size) > rank);
1197   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1198   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1199   rowners[0] = 0;
1200   for ( i=2; i<=size; i++ ) {
1201     rowners[i] += rowners[i-1];
1202   }
1203 
1204   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1205   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1206 
1207   if (!rank) {
1208     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1209 
1210     /* read in my part of the matrix numerical values  */
1211     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1212 
1213     /* insert into matrix-by row (this is why cannot directly read into array */
1214     vals_ptr = vals;
1215     for ( i=0; i<m; i++ ) {
1216       for ( j=0; j<N; j++ ) {
1217         array[i + j*m] = *vals_ptr++;
1218       }
1219     }
1220 
1221     /* read in other processors and ship out */
1222     for ( i=1; i<size; i++ ) {
1223       nz   = (rowners[i+1] - rowners[i])*N;
1224       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1225       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1226     }
1227   } else {
1228     /* receive numeric values */
1229     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1230 
1231     /* receive message of values*/
1232     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1233 
1234     /* insert into matrix-by row (this is why cannot directly read into array */
1235     vals_ptr = vals;
1236     for ( i=0; i<m; i++ ) {
1237       for ( j=0; j<N; j++ ) {
1238         array[i + j*m] = *vals_ptr++;
1239       }
1240     }
1241   }
1242   ierr = PetscFree(rowners);CHKERRQ(ierr);
1243   ierr = PetscFree(vals);CHKERRQ(ierr);
1244   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1245   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 
1250 #undef __FUNC__
1251 #define __FUNC__ "MatLoad_MPIDense"
1252 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1253 {
1254   Mat          A;
1255   Scalar       *vals,*svals;
1256   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1257   MPI_Status   status;
1258   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1259   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1260   int          tag = ((PetscObject)viewer)->tag;
1261   int          i, nz, ierr, j,rstart, rend, fd;
1262 
1263   PetscFunctionBegin;
1264   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1265   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1266   if (!rank) {
1267     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1268     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1269     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1270   }
1271 
1272   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1273   M = header[1]; N = header[2]; nz = header[3];
1274 
1275   /*
1276        Handle case where matrix is stored on disk as a dense matrix
1277   */
1278   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1279     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1280     PetscFunctionReturn(0);
1281   }
1282 
1283   /* determine ownership of all rows */
1284   m          = M/size + ((M % size) > rank);
1285   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1286   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1287   rowners[0] = 0;
1288   for ( i=2; i<=size; i++ ) {
1289     rowners[i] += rowners[i-1];
1290   }
1291   rstart = rowners[rank];
1292   rend   = rowners[rank+1];
1293 
1294   /* distribute row lengths to all processors */
1295   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1296   offlens = ourlens + (rend-rstart);
1297   if (!rank) {
1298     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1299     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1300     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1301     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1302     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1303     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1304   } else {
1305     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1306   }
1307 
1308   if (!rank) {
1309     /* calculate the number of nonzeros on each processor */
1310     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1311     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1312     for ( i=0; i<size; i++ ) {
1313       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1314         procsnz[i] += rowlengths[j];
1315       }
1316     }
1317     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1318 
1319     /* determine max buffer needed and allocate it */
1320     maxnz = 0;
1321     for ( i=0; i<size; i++ ) {
1322       maxnz = PetscMax(maxnz,procsnz[i]);
1323     }
1324     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1325 
1326     /* read in my part of the matrix column indices  */
1327     nz = procsnz[0];
1328     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1329     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1330 
1331     /* read in every one elses and ship off */
1332     for ( i=1; i<size; i++ ) {
1333       nz   = procsnz[i];
1334       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1335       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1336     }
1337     ierr = PetscFree(cols);CHKERRQ(ierr);
1338   } else {
1339     /* determine buffer space needed for message */
1340     nz = 0;
1341     for ( i=0; i<m; i++ ) {
1342       nz += ourlens[i];
1343     }
1344     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1345 
1346     /* receive message of column indices*/
1347     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1348     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1349     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1350   }
1351 
1352   /* loop over local rows, determining number of off diagonal entries */
1353   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1354   jj = 0;
1355   for ( i=0; i<m; i++ ) {
1356     for ( j=0; j<ourlens[i]; j++ ) {
1357       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1358       jj++;
1359     }
1360   }
1361 
1362   /* create our matrix */
1363   for ( i=0; i<m; i++ ) {
1364     ourlens[i] -= offlens[i];
1365   }
1366   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1367   A = *newmat;
1368   for ( i=0; i<m; i++ ) {
1369     ourlens[i] += offlens[i];
1370   }
1371 
1372   if (!rank) {
1373     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1374 
1375     /* read in my part of the matrix numerical values  */
1376     nz = procsnz[0];
1377     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1378 
1379     /* insert into matrix */
1380     jj      = rstart;
1381     smycols = mycols;
1382     svals   = vals;
1383     for ( i=0; i<m; i++ ) {
1384       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1385       smycols += ourlens[i];
1386       svals   += ourlens[i];
1387       jj++;
1388     }
1389 
1390     /* read in other processors and ship out */
1391     for ( i=1; i<size; i++ ) {
1392       nz   = procsnz[i];
1393       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1394       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1395     }
1396     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1397   } else {
1398     /* receive numeric values */
1399     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1400 
1401     /* receive message of values*/
1402     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1403     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1404     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1405 
1406     /* insert into matrix */
1407     jj      = rstart;
1408     smycols = mycols;
1409     svals   = vals;
1410     for ( i=0; i<m; i++ ) {
1411       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1412       smycols += ourlens[i];
1413       svals   += ourlens[i];
1414       jj++;
1415     }
1416   }
1417   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1418   ierr = PetscFree(vals);CHKERRQ(ierr);
1419   ierr = PetscFree(mycols);CHKERRQ(ierr);
1420   ierr = PetscFree(rowners);CHKERRQ(ierr);
1421 
1422   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1423   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 
1428 
1429 
1430 
1431