xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision b9b9770376f6bdad54cc53bb7a8b79bbb53b7e80)
1 /*$Id: mpidense.c,v 1.140 2000/05/10 16:40:34 bsmith Exp bsmith $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNC__
12 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = mdn->m,rstart = mdn->rstart,rank,ierr;
17   Scalar       *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
27   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
28   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
29   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
30   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
33 
34   *iscopy = PETSC_TRUE;
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN int MatSetUpMultiply_MPIDense(Mat);
40 
41 #undef __FUNC__
42 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIDense"
43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
44 {
45   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
46   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
47   int          roworiented = A->roworiented;
48 
49   PetscFunctionBegin;
50   for (i=0; i<m; i++) {
51     if (idxm[i] < 0) continue;
52     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
53     if (idxm[i] >= rstart && idxm[i] < rend) {
54       row = idxm[i] - rstart;
55       if (roworiented) {
56         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
57       } else {
58         for (j=0; j<n; j++) {
59           if (idxn[j] < 0) continue;
60           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
61           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
62         }
63       }
64     } else {
65       if (!A->donotstash) {
66         if (roworiented) {
67           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
68         } else {
69           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
70         }
71       }
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIDense"
79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
80 {
81   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
82   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
83 
84   PetscFunctionBegin;
85   for (i=0; i<m; i++) {
86     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
87     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
88     if (idxm[i] >= rstart && idxm[i] < rend) {
89       row = idxm[i] - rstart;
90       for (j=0; j<n; j++) {
91         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
92         if (idxn[j] >= mdn->N) {
93           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
94         }
95         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
96       }
97     } else {
98       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
99     }
100   }
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNC__
105 #define __FUNC__ /*<a name=""></a>*/"MatGetArray_MPIDense"
106 int MatGetArray_MPIDense(Mat A,Scalar **array)
107 {
108   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
109   int          ierr;
110 
111   PetscFunctionBegin;
112   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNC__
117 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIDense"
118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
119 {
120   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
121   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
122   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols,rank;
123   Scalar       *av,*bv,*v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
128   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
129   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
130   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
131   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
132 
133   /* No parallel redistribution currently supported! Should really check each index set
134      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
135      original matrix! */
136 
137   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
138   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
139 
140   /* Check submatrix call */
141   if (scall == MAT_REUSE_MATRIX) {
142     /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */
143     /* Really need to test rows and column sizes! */
144     newmat = *B;
145   } else {
146     /* Create and fill new matrix */
147     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
148   }
149 
150   /* Now extract the data pointers and do the copy, column at a time */
151   newmatd = (Mat_MPIDense*)newmat->data;
152   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
153 
154   for (i=0; i<ncols; i++) {
155     av = v + nlrows*icol[i];
156     for (j=0; j<nrows; j++) {
157       *bv++ = av[irow[j] - rstart];
158     }
159   }
160 
161   /* Assemble the matrices so that the correct flags are set */
162   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
163   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164 
165   /* Free work space */
166   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
167   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
168   *B = newmat;
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNC__
173 #define __FUNC__ /*<a name=""></a>*/"MatRestoreArray_MPIDense"
174 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
175 {
176   PetscFunctionBegin;
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNC__
181 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIDense"
182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
183 {
184   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
185   MPI_Comm     comm = mat->comm;
186   int          ierr,nstash,reallocs;
187   InsertMode   addv;
188 
189   PetscFunctionBegin;
190   /* make sure all processors are either in INSERTMODE or ADDMODE */
191   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
192   if (addv == (ADD_VALUES|INSERT_VALUES)) {
193     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
194   }
195   mat->insertmode = addv; /* in case this processor had no cache */
196 
197   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
198   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
199   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIDense"
205 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
206 {
207   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
208   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
209   Scalar       *val;
210   InsertMode   addv=mat->insertmode;
211 
212   PetscFunctionBegin;
213   /*  wait on receives */
214   while (1) {
215     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
216     if (!flg) break;
217 
218     for (i=0; i<n;) {
219       /* Now identify the consecutive vals belonging to the same row */
220       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
221       if (j < n) ncols = j-i;
222       else       ncols = n-i;
223       /* Now assemble all these values with a single function call */
224       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
225       i = j;
226     }
227   }
228   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
229 
230   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNC__
240 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIDense"
241 int MatZeroEntries_MPIDense(Mat A)
242 {
243   int          ierr;
244   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
245 
246   PetscFunctionBegin;
247   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNC__
252 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIDense"
253 int MatGetBlockSize_MPIDense(Mat A,int *bs)
254 {
255   PetscFunctionBegin;
256   *bs = 1;
257   PetscFunctionReturn(0);
258 }
259 
260 /* the code does not do the diagonal entries correctly unless the
261    matrix is square and the column and row owerships are identical.
262    This is a BUG. The only way to fix it seems to be to access
263    mdn->A and mdn->B directly and not through the MatZeroRows()
264    routine.
265 */
266 #undef __FUNC__
267 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIDense"
268 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
269 {
270   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
271   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
272   int            *procs,*nprocs,j,found,idx,nsends,*work;
273   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
274   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
275   int            *lens,imdex,*lrows,*values;
276   MPI_Comm       comm = A->comm;
277   MPI_Request    *send_waits,*recv_waits;
278   MPI_Status     recv_status,*send_status;
279   IS             istmp;
280 
281   PetscFunctionBegin;
282   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
283   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
284 
285   /*  first count number of contributors to each processor */
286   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
287   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
288   procs  = nprocs + size;
289   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
290   for (i=0; i<N; i++) {
291     idx = rows[i];
292     found = 0;
293     for (j=0; j<size; j++) {
294       if (idx >= owners[j] && idx < owners[j+1]) {
295         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
296       }
297     }
298     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
299   }
300   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
301 
302   /* inform other processors of number of messages and max length*/
303   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
304   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
305   nmax   = work[rank];
306   nrecvs = work[size+rank];
307   ierr   = PetscFree(work);CHKERRQ(ierr);
308 
309   /* post receives:   */
310   rvalues    = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
311   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
312   for (i=0; i<nrecvs; i++) {
313     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
314   }
315 
316   /* do sends:
317       1) starts[i] gives the starting index in svalues for stuff going to
318          the ith processor
319   */
320   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
321   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
322   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
323   starts[0]  = 0;
324   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
325   for (i=0; i<N; i++) {
326     svalues[starts[owner[i]]++] = rows[i];
327   }
328   ISRestoreIndices(is,&rows);
329 
330   starts[0] = 0;
331   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
332   count = 0;
333   for (i=0; i<size; i++) {
334     if (procs[i]) {
335       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
336     }
337   }
338   ierr = PetscFree(starts);CHKERRQ(ierr);
339 
340   base = owners[rank];
341 
342   /*  wait on receives */
343   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
344   source = lens + nrecvs;
345   count  = nrecvs; slen = 0;
346   while (count) {
347     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
348     /* unpack receives into our local space */
349     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
350     source[imdex]  = recv_status.MPI_SOURCE;
351     lens[imdex]  = n;
352     slen += n;
353     count--;
354   }
355   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
356 
357   /* move the data into the send scatter */
358   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
359   count = 0;
360   for (i=0; i<nrecvs; i++) {
361     values = rvalues + i*nmax;
362     for (j=0; j<lens[i]; j++) {
363       lrows[count++] = values[j] - base;
364     }
365   }
366   ierr = PetscFree(rvalues);CHKERRQ(ierr);
367   ierr = PetscFree(lens);CHKERRQ(ierr);
368   ierr = PetscFree(owner);CHKERRQ(ierr);
369   ierr = PetscFree(nprocs);CHKERRQ(ierr);
370 
371   /* actually zap the local rows */
372   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
373   PLogObjectParent(A,istmp);
374   ierr = PetscFree(lrows);CHKERRQ(ierr);
375   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
376   ierr = ISDestroy(istmp);CHKERRQ(ierr);
377 
378   /* wait on sends */
379   if (nsends) {
380     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
381     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
382     ierr = PetscFree(send_status);CHKERRQ(ierr);
383   }
384   ierr = PetscFree(send_waits);CHKERRQ(ierr);
385   ierr = PetscFree(svalues);CHKERRQ(ierr);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNC__
391 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIDense"
392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
395   int          ierr;
396 
397   PetscFunctionBegin;
398   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIDense"
406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
409   int          ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNC__
419 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIDense"
420 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
421 {
422   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
423   int          ierr;
424   Scalar       zero = 0.0;
425 
426   PetscFunctionBegin;
427   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
428   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
429   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNC__
435 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIDense"
436 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
439   int          ierr;
440 
441   PetscFunctionBegin;
442   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
443   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
444   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNC__
450 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIDense"
451 int MatGetDiagonal_MPIDense(Mat A,Vec v)
452 {
453   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
454   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
455   int          ierr,len,i,n,m = a->m,radd;
456   Scalar       *x,zero = 0.0;
457 
458   PetscFunctionBegin;
459   VecSet(&zero,v);
460   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
461   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
462   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
463   len = PetscMin(aloc->m,aloc->n);
464   radd = a->rstart*m;
465   for (i=0; i<len; i++) {
466     x[i] = aloc->v[radd + i*m + i];
467   }
468   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIDense"
474 int MatDestroy_MPIDense(Mat mat)
475 {
476   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
477   int          ierr;
478 
479   PetscFunctionBegin;
480 
481   if (mat->mapping) {
482     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
483   }
484   if (mat->bmapping) {
485     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
486   }
487 #if defined(PETSC_USE_LOG)
488   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
489 #endif
490   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
491   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
492   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
493   if (mdn->lvec)   VecDestroy(mdn->lvec);
494   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
495   if (mdn->factor) {
496     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
497     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
498     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
499     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
500   }
501   ierr = PetscFree(mdn);CHKERRQ(ierr);
502   if (mat->rmap) {
503     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
504   }
505   if (mat->cmap) {
506     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
507   }
508   PLogObjectDestroy(mat);
509   PetscHeaderDestroy(mat);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNC__
514 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_Binary"
515 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
516 {
517   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
518   int          ierr;
519 
520   PetscFunctionBegin;
521   if (mdn->size == 1) {
522     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
523   }
524   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNC__
529 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_ASCIIorDraworSocket"
530 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,Viewer viewer)
531 {
532   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
533   int          ierr,format,size = mdn->size,rank = mdn->rank;
534   ViewerType   vtype;
535   PetscTruth   isascii,isdraw;
536   Viewer       sviewer;
537 
538   PetscFunctionBegin;
539   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
540   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
541   if (isascii) {
542     ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
543     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
544     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
545       MatInfo info;
546       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
547       ierr = ViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
548                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
549       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
550       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
551       PetscFunctionReturn(0);
552     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
553       PetscFunctionReturn(0);
554     }
555   } else if (isdraw) {
556     Draw       draw;
557     PetscTruth isnull;
558 
559     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
560     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr);
561     if (isnull) PetscFunctionReturn(0);
562   }
563 
564   if (size == 1) {
565     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
566   } else {
567     /* assemble the entire matrix onto first processor. */
568     Mat          A;
569     int          M = mdn->M,N = mdn->N,m,row,i,nz,*cols;
570     Scalar       *vals;
571     Mat_SeqDense *Amdn = (Mat_SeqDense*)mdn->A->data;
572 
573     if (!rank) {
574       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
575     } else {
576       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
577     }
578     PLogObjectParent(mat,A);
579 
580     /* Copy the matrix ... This isn't the most efficient means,
581        but it's quick for now */
582     row = mdn->rstart; m = Amdn->m;
583     for (i=0; i<m; i++) {
584       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
585       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
586       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
587       row++;
588     }
589 
590     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
591     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
592     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
593     if (!rank) {
594       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
595     }
596     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
597     ierr = ViewerFlush(viewer);CHKERRQ(ierr);
598     ierr = MatDestroy(A);CHKERRQ(ierr);
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNC__
604 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense"
605 int MatView_MPIDense(Mat mat,Viewer viewer)
606 {
607   int        ierr;
608   PetscTruth isascii,isbinary,isdraw,issocket;
609 
610   PetscFunctionBegin;
611 
612   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
613   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
614   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
615   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
616 
617   if (isascii || issocket || isdraw) {
618     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
619   } else if (isbinary) {
620     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
621   } else {
622     SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 #undef __FUNC__
628 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIDense"
629 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
630 {
631   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
632   Mat          mdn = mat->A;
633   int          ierr;
634   PetscReal    isend[5],irecv[5];
635 
636   PetscFunctionBegin;
637   info->rows_global    = (double)mat->M;
638   info->columns_global = (double)mat->N;
639   info->rows_local     = (double)mat->m;
640   info->columns_local  = (double)mat->N;
641   info->block_size     = 1.0;
642   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
643   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
644   isend[3] = info->memory;  isend[4] = info->mallocs;
645   if (flag == MAT_LOCAL) {
646     info->nz_used      = isend[0];
647     info->nz_allocated = isend[1];
648     info->nz_unneeded  = isend[2];
649     info->memory       = isend[3];
650     info->mallocs      = isend[4];
651   } else if (flag == MAT_GLOBAL_MAX) {
652     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
653     info->nz_used      = irecv[0];
654     info->nz_allocated = irecv[1];
655     info->nz_unneeded  = irecv[2];
656     info->memory       = irecv[3];
657     info->mallocs      = irecv[4];
658   } else if (flag == MAT_GLOBAL_SUM) {
659     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
660     info->nz_used      = irecv[0];
661     info->nz_allocated = irecv[1];
662     info->nz_unneeded  = irecv[2];
663     info->memory       = irecv[3];
664     info->mallocs      = irecv[4];
665   }
666   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
667   info->fill_ratio_needed = 0;
668   info->factor_mallocs    = 0;
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNC__
673 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIDense"
674 int MatSetOption_MPIDense(Mat A,MatOption op)
675 {
676   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
677 
678   PetscFunctionBegin;
679   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
680       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
681       op == MAT_NEW_NONZERO_LOCATION_ERR ||
682       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
683       op == MAT_COLUMNS_SORTED ||
684       op == MAT_COLUMNS_UNSORTED) {
685         MatSetOption(a->A,op);
686   } else if (op == MAT_ROW_ORIENTED) {
687         a->roworiented = 1;
688         MatSetOption(a->A,op);
689   } else if (op == MAT_ROWS_SORTED ||
690              op == MAT_ROWS_UNSORTED ||
691              op == MAT_SYMMETRIC ||
692              op == MAT_STRUCTURALLY_SYMMETRIC ||
693              op == MAT_YES_NEW_DIAGONALS ||
694              op == MAT_USE_HASH_TABLE) {
695     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
696   } else if (op == MAT_COLUMN_ORIENTED) {
697     a->roworiented = 0; MatSetOption(a->A,op);
698   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
699     a->donotstash = 1;
700   } else if (op == MAT_NO_NEW_DIAGONALS) {
701     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
702   } else {
703     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
704   }
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNC__
709 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIDense"
710 int MatGetSize_MPIDense(Mat A,int *m,int *n)
711 {
712   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
713 
714   PetscFunctionBegin;
715   if (m) *m = mat->M;
716   if (n) *n = mat->N;
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNC__
721 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIDense"
722 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
723 {
724   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
725 
726   PetscFunctionBegin;
727   *m = mat->m; *n = mat->N;
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNC__
732 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIDense"
733 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
734 {
735   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
736 
737   PetscFunctionBegin;
738   if (m) *m = mat->rstart;
739   if (n) *n = mat->rend;
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNC__
744 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIDense"
745 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
746 {
747   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
748   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
749 
750   PetscFunctionBegin;
751   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
752   lrow = row - rstart;
753   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNC__
758 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIDense"
759 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
760 {
761   int ierr;
762 
763   PetscFunctionBegin;
764   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
765   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNC__
770 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIDense"
771 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
772 {
773   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
774   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
775   Scalar       *l,*r,x,*v;
776   int          ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n;
777 
778   PetscFunctionBegin;
779   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
780   if (ll) {
781     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
782     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size");
783     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
784     for (i=0; i<m; i++) {
785       x = l[i];
786       v = mat->v + i;
787       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
788     }
789     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
790     PLogFlops(n*m);
791   }
792   if (rr) {
793     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
794     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size");
795     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
796     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
797     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
798     for (i=0; i<n; i++) {
799       x = r[i];
800       v = mat->v + i*m;
801       for (j=0; j<m; j++) { (*v++) *= x;}
802     }
803     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
804     PLogFlops(n*m);
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNC__
810 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIDense"
811 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm)
812 {
813   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
814   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
815   int          ierr,i,j;
816   PetscReal    sum = 0.0;
817   Scalar       *v = mat->v;
818 
819   PetscFunctionBegin;
820   if (mdn->size == 1) {
821     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
822   } else {
823     if (type == NORM_FROBENIUS) {
824       for (i=0; i<mat->n*mat->m; i++) {
825 #if defined(PETSC_USE_COMPLEX)
826         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
827 #else
828         sum += (*v)*(*v); v++;
829 #endif
830       }
831       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
832       *norm = sqrt(*norm);
833       PLogFlops(2*mat->n*mat->m);
834     } else if (type == NORM_1) {
835       PetscReal *tmp,*tmp2;
836       tmp  = (PetscReal*)PetscMalloc(2*mdn->N*sizeof(PetscReal));CHKPTRQ(tmp);
837       tmp2 = tmp + mdn->N;
838       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(PetscReal));CHKERRQ(ierr);
839       *norm = 0.0;
840       v = mat->v;
841       for (j=0; j<mat->n; j++) {
842         for (i=0; i<mat->m; i++) {
843           tmp[j] += PetscAbsScalar(*v);  v++;
844         }
845       }
846       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
847       for (j=0; j<mdn->N; j++) {
848         if (tmp2[j] > *norm) *norm = tmp2[j];
849       }
850       ierr = PetscFree(tmp);CHKERRQ(ierr);
851       PLogFlops(mat->n*mat->m);
852     } else if (type == NORM_INFINITY) { /* max row norm */
853       PetscReal ntemp;
854       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
855       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
856     } else {
857       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
858     }
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 #undef __FUNC__
864 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIDense"
865 int MatTranspose_MPIDense(Mat A,Mat *matout)
866 {
867   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
868   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
869   Mat          B;
870   int          M = a->M,N = a->N,m,n,*rwork,rstart = a->rstart;
871   int          j,i,ierr;
872   Scalar       *v;
873 
874   PetscFunctionBegin;
875   if (!matout && M != N) {
876     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
877   }
878   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
879 
880   m = Aloc->m; n = Aloc->n; v = Aloc->v;
881   rwork = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
882   for (j=0; j<n; j++) {
883     for (i=0; i<m; i++) rwork[i] = rstart + i;
884     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
885     v   += m;
886   }
887   ierr = PetscFree(rwork);CHKERRQ(ierr);
888   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
889   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
890   if (matout) {
891     *matout = B;
892   } else {
893     PetscOps *Abops;
894     MatOps   Aops;
895 
896     /* This isn't really an in-place transpose, but free data struct from a */
897     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
898     ierr = MatDestroy(a->A);CHKERRQ(ierr);
899     if (a->lvec) VecDestroy(a->lvec);
900     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
901     ierr = PetscFree(a);CHKERRQ(ierr);
902 
903     /*
904          This is horrible, horrible code. We need to keep the
905       A pointers for the bops and ops but copy everything
906       else from C.
907     */
908     Abops   = A->bops;
909     Aops    = A->ops;
910     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
911     A->bops = Abops;
912     A->ops  = Aops;
913 
914     PetscHeaderDestroy(B);
915   }
916   PetscFunctionReturn(0);
917 }
918 
919 #include "pinclude/blaslapack.h"
920 #undef __FUNC__
921 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIDense"
922 int MatScale_MPIDense(Scalar *alpha,Mat inA)
923 {
924   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
925   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
926   int          one = 1,nz;
927 
928   PetscFunctionBegin;
929   nz = a->m*a->n;
930   BLscal_(&nz,alpha,a->v,&one);
931   PLogFlops(nz);
932   PetscFunctionReturn(0);
933 }
934 
935 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
936 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
937 
938 /* -------------------------------------------------------------------*/
939 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
940        MatGetRow_MPIDense,
941        MatRestoreRow_MPIDense,
942        MatMult_MPIDense,
943        MatMultAdd_MPIDense,
944        MatMultTranspose_MPIDense,
945        MatMultTransposeAdd_MPIDense,
946        0,
947        0,
948        0,
949        0,
950        0,
951        0,
952        0,
953        MatTranspose_MPIDense,
954        MatGetInfo_MPIDense,0,
955        MatGetDiagonal_MPIDense,
956        MatDiagonalScale_MPIDense,
957        MatNorm_MPIDense,
958        MatAssemblyBegin_MPIDense,
959        MatAssemblyEnd_MPIDense,
960        0,
961        MatSetOption_MPIDense,
962        MatZeroEntries_MPIDense,
963        MatZeroRows_MPIDense,
964        0,
965        0,
966        0,
967        0,
968        MatGetSize_MPIDense,
969        MatGetLocalSize_MPIDense,
970        MatGetOwnershipRange_MPIDense,
971        0,
972        0,
973        MatGetArray_MPIDense,
974        MatRestoreArray_MPIDense,
975        MatDuplicate_MPIDense,
976        0,
977        0,
978        0,
979        0,
980        0,
981        MatGetSubMatrices_MPIDense,
982        0,
983        MatGetValues_MPIDense,
984        0,
985        0,
986        MatScale_MPIDense,
987        0,
988        0,
989        0,
990        MatGetBlockSize_MPIDense,
991        0,
992        0,
993        0,
994        0,
995        0,
996        0,
997        0,
998        0,
999        0,
1000        MatGetSubMatrix_MPIDense,
1001        MatDestroy_MPIDense,
1002        MatView_MPIDense,
1003        MatGetMaps_Petsc};
1004 
1005 #undef __FUNC__
1006 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIDense"
1007 /*@C
1008    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1009 
1010    Collective on MPI_Comm
1011 
1012    Input Parameters:
1013 +  comm - MPI communicator
1014 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1015 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1016 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1017 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1018 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1019    to control all matrix memory allocation.
1020 
1021    Output Parameter:
1022 .  A - the matrix
1023 
1024    Notes:
1025    The dense format is fully compatible with standard Fortran 77
1026    storage by columns.
1027 
1028    The data input variable is intended primarily for Fortran programmers
1029    who wish to allocate their own matrix memory space.  Most users should
1030    set data=PETSC_NULL.
1031 
1032    The user MUST specify either the local or global matrix dimensions
1033    (possibly both).
1034 
1035    Level: intermediate
1036 
1037 .keywords: matrix,dense, parallel
1038 
1039 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1040 @*/
1041 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1042 {
1043   Mat          mat;
1044   Mat_MPIDense *a;
1045   int          ierr,i;
1046   PetscTruth   flg;
1047 
1048   PetscFunctionBegin;
1049   /* Note:  For now, when data is specified above, this assumes the user correctly
1050    allocates the local dense storage space.  We should add error checking. */
1051 
1052   *A = 0;
1053   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
1054   PLogObjectCreate(mat);
1055   mat->data         = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1056   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1057   mat->factor       = 0;
1058   mat->mapping      = 0;
1059 
1060   a->factor       = 0;
1061   mat->insertmode = NOT_SET_VALUES;
1062   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
1063   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
1064 
1065   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1066 
1067   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1068   a->nvec = n;
1069 
1070   /* each row stores all columns */
1071   a->N = mat->N = N;
1072   a->M = mat->M = M;
1073   a->m = mat->m = m;
1074   a->n = mat->n = N;   /* NOTE: n == N */
1075 
1076   /* the information in the maps duplicates the information computed below, eventually
1077      we should remove the duplicate information that is not contained in the maps */
1078   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1079   ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr);
1080 
1081   /* build local table of row and column ownerships */
1082   a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1083   a->cowners = a->rowners + a->size + 1;
1084   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1085   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1086   a->rowners[0] = 0;
1087   for (i=2; i<=a->size; i++) {
1088     a->rowners[i] += a->rowners[i-1];
1089   }
1090   a->rstart = a->rowners[a->rank];
1091   a->rend   = a->rowners[a->rank+1];
1092   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1093   a->cowners[0] = 0;
1094   for (i=2; i<=a->size; i++) {
1095     a->cowners[i] += a->cowners[i-1];
1096   }
1097 
1098   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1099   PLogObjectParent(mat,a->A);
1100 
1101   /* build cache for off array entries formed */
1102   a->donotstash = 0;
1103   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1104 
1105   /* stuff used for matrix vector multiply */
1106   a->lvec        = 0;
1107   a->Mvctx       = 0;
1108   a->roworiented = 1;
1109 
1110   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1111                                      "MatGetDiagonalBlock_MPIDense",
1112                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1113 
1114   *A = mat;
1115   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1116   if (flg) {
1117     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1118   }
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNC__
1123 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIDense"
1124 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1125 {
1126   Mat          mat;
1127   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1128   int          ierr;
1129   FactorCtx    *factor;
1130 
1131   PetscFunctionBegin;
1132   *newmat       = 0;
1133   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1134   PLogObjectCreate(mat);
1135   mat->data         = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1136   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1137   mat->factor       = A->factor;
1138   mat->assembled    = PETSC_TRUE;
1139 
1140   a->m = mat->m = oldmat->m;
1141   a->n = mat->n = oldmat->n;
1142   a->M = mat->M = oldmat->M;
1143   a->N = mat->N = oldmat->N;
1144   if (oldmat->factor) {
1145     a->factor = (FactorCtx*)(factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1146     /* copy factor contents ... add this code! */
1147   } else a->factor = 0;
1148 
1149   a->rstart       = oldmat->rstart;
1150   a->rend         = oldmat->rend;
1151   a->size         = oldmat->size;
1152   a->rank         = oldmat->rank;
1153   mat->insertmode = NOT_SET_VALUES;
1154   a->nvec         = oldmat->nvec;
1155   a->donotstash   = oldmat->donotstash;
1156   a->rowners = (int*)PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1157   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1158   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1159   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1160 
1161   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1162   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1163   PLogObjectParent(mat,a->A);
1164   *newmat = mat;
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #include "petscsys.h"
1169 
1170 #undef __FUNC__
1171 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense_DenseInFile"
1172 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1173 {
1174   int        *rowners,i,size,rank,m,ierr,nz,j;
1175   Scalar     *array,*vals,*vals_ptr;
1176   MPI_Status status;
1177 
1178   PetscFunctionBegin;
1179   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1180   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1181 
1182   /* determine ownership of all rows */
1183   m          = M/size + ((M % size) > rank);
1184   rowners    = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1185   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1186   rowners[0] = 0;
1187   for (i=2; i<=size; i++) {
1188     rowners[i] += rowners[i-1];
1189   }
1190 
1191   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1192   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1193 
1194   if (!rank) {
1195     vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals);
1196 
1197     /* read in my part of the matrix numerical values  */
1198     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1199 
1200     /* insert into matrix-by row (this is why cannot directly read into array */
1201     vals_ptr = vals;
1202     for (i=0; i<m; i++) {
1203       for (j=0; j<N; j++) {
1204         array[i + j*m] = *vals_ptr++;
1205       }
1206     }
1207 
1208     /* read in other processors and ship out */
1209     for (i=1; i<size; i++) {
1210       nz   = (rowners[i+1] - rowners[i])*N;
1211       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1212       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1213     }
1214   } else {
1215     /* receive numeric values */
1216     vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals);
1217 
1218     /* receive message of values*/
1219     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1220 
1221     /* insert into matrix-by row (this is why cannot directly read into array */
1222     vals_ptr = vals;
1223     for (i=0; i<m; i++) {
1224       for (j=0; j<N; j++) {
1225         array[i + j*m] = *vals_ptr++;
1226       }
1227     }
1228   }
1229   ierr = PetscFree(rowners);CHKERRQ(ierr);
1230   ierr = PetscFree(vals);CHKERRQ(ierr);
1231   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1232   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 
1237 #undef __FUNC__
1238 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense"
1239 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1240 {
1241   Mat          A;
1242   Scalar       *vals,*svals;
1243   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1244   MPI_Status   status;
1245   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1246   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1247   int          tag = ((PetscObject)viewer)->tag;
1248   int          i,nz,ierr,j,rstart,rend,fd;
1249 
1250   PetscFunctionBegin;
1251   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1253   if (!rank) {
1254     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1255     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1256     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1257   }
1258 
1259   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1260   M = header[1]; N = header[2]; nz = header[3];
1261 
1262   /*
1263        Handle case where matrix is stored on disk as a dense matrix
1264   */
1265   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1266     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1267     PetscFunctionReturn(0);
1268   }
1269 
1270   /* determine ownership of all rows */
1271   m          = M/size + ((M % size) > rank);
1272   rowners    = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1273   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1274   rowners[0] = 0;
1275   for (i=2; i<=size; i++) {
1276     rowners[i] += rowners[i-1];
1277   }
1278   rstart = rowners[rank];
1279   rend   = rowners[rank+1];
1280 
1281   /* distribute row lengths to all processors */
1282   ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens);
1283   offlens = ourlens + (rend-rstart);
1284   if (!rank) {
1285     rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths);
1286     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1287     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
1288     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1289     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1290     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1291   } else {
1292     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1293   }
1294 
1295   if (!rank) {
1296     /* calculate the number of nonzeros on each processor */
1297     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
1298     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1299     for (i=0; i<size; i++) {
1300       for (j=rowners[i]; j< rowners[i+1]; j++) {
1301         procsnz[i] += rowlengths[j];
1302       }
1303     }
1304     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1305 
1306     /* determine max buffer needed and allocate it */
1307     maxnz = 0;
1308     for (i=0; i<size; i++) {
1309       maxnz = PetscMax(maxnz,procsnz[i]);
1310     }
1311     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
1312 
1313     /* read in my part of the matrix column indices  */
1314     nz = procsnz[0];
1315     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1316     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1317 
1318     /* read in every one elses and ship off */
1319     for (i=1; i<size; i++) {
1320       nz   = procsnz[i];
1321       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1322       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1323     }
1324     ierr = PetscFree(cols);CHKERRQ(ierr);
1325   } else {
1326     /* determine buffer space needed for message */
1327     nz = 0;
1328     for (i=0; i<m; i++) {
1329       nz += ourlens[i];
1330     }
1331     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1332 
1333     /* receive message of column indices*/
1334     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1335     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1336     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1337   }
1338 
1339   /* loop over local rows, determining number of off diagonal entries */
1340   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1341   jj = 0;
1342   for (i=0; i<m; i++) {
1343     for (j=0; j<ourlens[i]; j++) {
1344       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1345       jj++;
1346     }
1347   }
1348 
1349   /* create our matrix */
1350   for (i=0; i<m; i++) {
1351     ourlens[i] -= offlens[i];
1352   }
1353   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1354   A = *newmat;
1355   for (i=0; i<m; i++) {
1356     ourlens[i] += offlens[i];
1357   }
1358 
1359   if (!rank) {
1360     vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals);
1361 
1362     /* read in my part of the matrix numerical values  */
1363     nz = procsnz[0];
1364     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1365 
1366     /* insert into matrix */
1367     jj      = rstart;
1368     smycols = mycols;
1369     svals   = vals;
1370     for (i=0; i<m; i++) {
1371       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1372       smycols += ourlens[i];
1373       svals   += ourlens[i];
1374       jj++;
1375     }
1376 
1377     /* read in other processors and ship out */
1378     for (i=1; i<size; i++) {
1379       nz   = procsnz[i];
1380       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1381       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1382     }
1383     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1384   } else {
1385     /* receive numeric values */
1386     vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals);
1387 
1388     /* receive message of values*/
1389     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1390     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1391     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1392 
1393     /* insert into matrix */
1394     jj      = rstart;
1395     smycols = mycols;
1396     svals   = vals;
1397     for (i=0; i<m; i++) {
1398       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1399       smycols += ourlens[i];
1400       svals   += ourlens[i];
1401       jj++;
1402     }
1403   }
1404   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1405   ierr = PetscFree(vals);CHKERRQ(ierr);
1406   ierr = PetscFree(mycols);CHKERRQ(ierr);
1407   ierr = PetscFree(rowners);CHKERRQ(ierr);
1408 
1409   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1410   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 
1415 
1416 
1417 
1418