xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision e090d5668ba2b2ea997ebb925e3a05be0dc5d9ab) !
1 /*$Id: mpidense.c,v 1.138 2000/05/04 14:04:46 balay Exp balay $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNC__
12 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = mdn->m,rstart = mdn->rstart,rank,ierr;
17   Scalar       *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
27   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
28   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
29   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
30   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
33 
34   *iscopy = PETSC_TRUE;
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 extern int MatSetUpMultiply_MPIDense(Mat);
40 
41 #undef __FUNC__
42 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIDense"
43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
44 {
45   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
46   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
47   int          roworiented = A->roworiented;
48 
49   PetscFunctionBegin;
50   for (i=0; i<m; i++) {
51     if (idxm[i] < 0) continue;
52     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
53     if (idxm[i] >= rstart && idxm[i] < rend) {
54       row = idxm[i] - rstart;
55       if (roworiented) {
56         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
57       } else {
58         for (j=0; j<n; j++) {
59           if (idxn[j] < 0) continue;
60           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
61           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
62         }
63       }
64     } else {
65       if (!A->donotstash) {
66         if (roworiented) {
67           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
68         } else {
69           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
70         }
71       }
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIDense"
79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
80 {
81   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
82   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
83 
84   PetscFunctionBegin;
85   for (i=0; i<m; i++) {
86     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
87     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
88     if (idxm[i] >= rstart && idxm[i] < rend) {
89       row = idxm[i] - rstart;
90       for (j=0; j<n; j++) {
91         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
92         if (idxn[j] >= mdn->N) {
93           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
94         }
95         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
96       }
97     } else {
98       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
99     }
100   }
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNC__
105 #define __FUNC__ /*<a name=""></a>*/"MatGetArray_MPIDense"
106 int MatGetArray_MPIDense(Mat A,Scalar **array)
107 {
108   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
109   int          ierr;
110 
111   PetscFunctionBegin;
112   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNC__
117 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIDense"
118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
119 {
120   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
121   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
122   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols,rank;
123   Scalar       *av,*bv,*v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
128   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
129   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
130   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
131   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
132 
133   /* No parallel redistribution currently supported! Should really check each index set
134      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
135      original matrix! */
136 
137   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
138   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
139 
140   /* Check submatrix call */
141   if (scall == MAT_REUSE_MATRIX) {
142     /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */
143     /* Really need to test rows and column sizes! */
144     newmat = *B;
145   } else {
146     /* Create and fill new matrix */
147     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
148   }
149 
150   /* Now extract the data pointers and do the copy, column at a time */
151   newmatd = (Mat_MPIDense*)newmat->data;
152   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
153 
154   for (i=0; i<ncols; i++) {
155     av = v + nlrows*icol[i];
156     for (j=0; j<nrows; j++) {
157       *bv++ = av[irow[j] - rstart];
158     }
159   }
160 
161   /* Assemble the matrices so that the correct flags are set */
162   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
163   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164 
165   /* Free work space */
166   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
167   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
168   *B = newmat;
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNC__
173 #define __FUNC__ /*<a name=""></a>*/"MatRestoreArray_MPIDense"
174 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
175 {
176   PetscFunctionBegin;
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNC__
181 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIDense"
182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
183 {
184   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
185   MPI_Comm     comm = mat->comm;
186   int          ierr,nstash,reallocs;
187   InsertMode   addv;
188 
189   PetscFunctionBegin;
190   /* make sure all processors are either in INSERTMODE or ADDMODE */
191   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
192   if (addv == (ADD_VALUES|INSERT_VALUES)) {
193     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
194   }
195   mat->insertmode = addv; /* in case this processor had no cache */
196 
197   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
198   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
199   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIDense"
205 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
206 {
207   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
208   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
209   Scalar       *val;
210   InsertMode   addv=mat->insertmode;
211 
212   PetscFunctionBegin;
213   /*  wait on receives */
214   while (1) {
215     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
216     if (!flg) break;
217 
218     for (i=0; i<n;) {
219       /* Now identify the consecutive vals belonging to the same row */
220       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
221       if (j < n) ncols = j-i;
222       else       ncols = n-i;
223       /* Now assemble all these values with a single function call */
224       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
225       i = j;
226     }
227   }
228   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
229 
230   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNC__
240 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIDense"
241 int MatZeroEntries_MPIDense(Mat A)
242 {
243   int          ierr;
244   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
245 
246   PetscFunctionBegin;
247   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNC__
252 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIDense"
253 int MatGetBlockSize_MPIDense(Mat A,int *bs)
254 {
255   PetscFunctionBegin;
256   *bs = 1;
257   PetscFunctionReturn(0);
258 }
259 
260 /* the code does not do the diagonal entries correctly unless the
261    matrix is square and the column and row owerships are identical.
262    This is a BUG. The only way to fix it seems to be to access
263    mdn->A and mdn->B directly and not through the MatZeroRows()
264    routine.
265 */
266 #undef __FUNC__
267 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIDense"
268 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
269 {
270   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
271   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
272   int            *procs,*nprocs,j,found,idx,nsends,*work;
273   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
274   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
275   int            *lens,imdex,*lrows,*values;
276   MPI_Comm       comm = A->comm;
277   MPI_Request    *send_waits,*recv_waits;
278   MPI_Status     recv_status,*send_status;
279   IS             istmp;
280 
281   PetscFunctionBegin;
282   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
283   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
284 
285   /*  first count number of contributors to each processor */
286   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
287   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
288   procs  = nprocs + size;
289   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
290   for (i=0; i<N; i++) {
291     idx = rows[i];
292     found = 0;
293     for (j=0; j<size; j++) {
294       if (idx >= owners[j] && idx < owners[j+1]) {
295         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
296       }
297     }
298     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
299   }
300   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
301 
302   /* inform other processors of number of messages and max length*/
303   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
304   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
305   nmax   = work[rank];
306   nrecvs = work[size+rank];
307   ierr   = PetscFree(work);CHKERRQ(ierr);
308 
309   /* post receives:   */
310   rvalues    = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
311   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
312   for (i=0; i<nrecvs; i++) {
313     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
314   }
315 
316   /* do sends:
317       1) starts[i] gives the starting index in svalues for stuff going to
318          the ith processor
319   */
320   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
321   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
322   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
323   starts[0]  = 0;
324   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
325   for (i=0; i<N; i++) {
326     svalues[starts[owner[i]]++] = rows[i];
327   }
328   ISRestoreIndices(is,&rows);
329 
330   starts[0] = 0;
331   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
332   count = 0;
333   for (i=0; i<size; i++) {
334     if (procs[i]) {
335       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
336     }
337   }
338   ierr = PetscFree(starts);CHKERRQ(ierr);
339 
340   base = owners[rank];
341 
342   /*  wait on receives */
343   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
344   source = lens + nrecvs;
345   count  = nrecvs; slen = 0;
346   while (count) {
347     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
348     /* unpack receives into our local space */
349     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
350     source[imdex]  = recv_status.MPI_SOURCE;
351     lens[imdex]  = n;
352     slen += n;
353     count--;
354   }
355   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
356 
357   /* move the data into the send scatter */
358   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
359   count = 0;
360   for (i=0; i<nrecvs; i++) {
361     values = rvalues + i*nmax;
362     for (j=0; j<lens[i]; j++) {
363       lrows[count++] = values[j] - base;
364     }
365   }
366   ierr = PetscFree(rvalues);CHKERRQ(ierr);
367   ierr = PetscFree(lens);CHKERRQ(ierr);
368   ierr = PetscFree(owner);CHKERRQ(ierr);
369   ierr = PetscFree(nprocs);CHKERRQ(ierr);
370 
371   /* actually zap the local rows */
372   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
373   PLogObjectParent(A,istmp);
374   ierr = PetscFree(lrows);CHKERRQ(ierr);
375   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
376   ierr = ISDestroy(istmp);CHKERRQ(ierr);
377 
378   /* wait on sends */
379   if (nsends) {
380     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
381     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
382     ierr = PetscFree(send_status);CHKERRQ(ierr);
383   }
384   ierr = PetscFree(send_waits);CHKERRQ(ierr);
385   ierr = PetscFree(svalues);CHKERRQ(ierr);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNC__
391 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIDense"
392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
395   int          ierr;
396 
397   PetscFunctionBegin;
398   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIDense"
406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
409   int          ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNC__
419 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIDense"
420 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
421 {
422   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
423   int          ierr;
424   Scalar       zero = 0.0;
425 
426   PetscFunctionBegin;
427   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
428   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
429   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNC__
435 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIDense"
436 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
439   int          ierr;
440 
441   PetscFunctionBegin;
442   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
443   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
444   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNC__
450 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIDense"
451 int MatGetDiagonal_MPIDense(Mat A,Vec v)
452 {
453   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
454   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
455   int          ierr,len,i,n,m = a->m,radd;
456   Scalar       *x,zero = 0.0;
457 
458   PetscFunctionBegin;
459   VecSet(&zero,v);
460   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
461   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
462   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
463   len = PetscMin(aloc->m,aloc->n);
464   radd = a->rstart*m;
465   for (i=0; i<len; i++) {
466     x[i] = aloc->v[radd + i*m + i];
467   }
468   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIDense"
474 int MatDestroy_MPIDense(Mat mat)
475 {
476   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
477   int          ierr;
478 
479   PetscFunctionBegin;
480 
481   if (mat->mapping) {
482     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
483   }
484   if (mat->bmapping) {
485     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
486   }
487 #if defined(PETSC_USE_LOG)
488   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
489 #endif
490   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
491   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
492   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
493   if (mdn->lvec)   VecDestroy(mdn->lvec);
494   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
495   if (mdn->factor) {
496     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
497     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
498     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
499     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
500   }
501   ierr = PetscFree(mdn);CHKERRQ(ierr);
502   if (mat->rmap) {
503     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
504   }
505   if (mat->cmap) {
506     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
507   }
508   PLogObjectDestroy(mat);
509   PetscHeaderDestroy(mat);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNC__
514 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_Binary"
515 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
516 {
517   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
518   int          ierr;
519 
520   PetscFunctionBegin;
521   if (mdn->size == 1) {
522     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
523   }
524   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNC__
529 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_ASCIIorDraworSocket"
530 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,Viewer viewer)
531 {
532   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
533   int          ierr,format,size = mdn->size,rank = mdn->rank;
534   ViewerType   vtype;
535   PetscTruth   isascii,isdraw;
536 
537   PetscFunctionBegin;
538   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
539   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
540   if (isascii) {
541     ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
542     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
543     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
544       MatInfo info;
545       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
546       ierr = ViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
547                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
548       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
549       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
550       PetscFunctionReturn(0);
551     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
552       PetscFunctionReturn(0);
553     }
554   } else if (isdraw) {
555     Draw       draw;
556     PetscTruth isnull;
557 
558     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
559     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr);
560     if (isnull) PetscFunctionReturn(0);
561   }
562 
563   if (size == 1) {
564     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
565   } else {
566     /* assemble the entire matrix onto first processor. */
567     Mat          A;
568     int          M = mdn->M,N = mdn->N,m,row,i,nz,*cols;
569     Scalar       *vals;
570     Mat_SeqDense *Amdn = (Mat_SeqDense*)mdn->A->data;
571 
572     if (!rank) {
573       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
574     } else {
575       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
576     }
577     PLogObjectParent(mat,A);
578 
579     /* Copy the matrix ... This isn't the most efficient means,
580        but it's quick for now */
581     row = mdn->rstart; m = Amdn->m;
582     for (i=0; i<m; i++) {
583       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
584       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
585       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
586       row++;
587     }
588 
589     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
590     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
591     if (!rank) {
592       Viewer sviewer;
593       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
594       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
595       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
596     }
597     ierr = ViewerFlush(viewer);CHKERRQ(ierr);
598     ierr = MatDestroy(A);CHKERRQ(ierr);
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNC__
604 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense"
605 int MatView_MPIDense(Mat mat,Viewer viewer)
606 {
607   int        ierr;
608   PetscTruth isascii,isbinary,isdraw,issocket;
609 
610   PetscFunctionBegin;
611 
612   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
613   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
614   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
615   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
616 
617   if (isascii || issocket || isdraw) {
618     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
619   } else if (isbinary) {
620     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
621   } else {
622     SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 #undef __FUNC__
628 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIDense"
629 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
630 {
631   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
632   Mat          mdn = mat->A;
633   int          ierr;
634   PetscReal    isend[5],irecv[5];
635 
636   PetscFunctionBegin;
637   info->rows_global    = (double)mat->M;
638   info->columns_global = (double)mat->N;
639   info->rows_local     = (double)mat->m;
640   info->columns_local  = (double)mat->N;
641   info->block_size     = 1.0;
642   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
643   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
644   isend[3] = info->memory;  isend[4] = info->mallocs;
645   if (flag == MAT_LOCAL) {
646     info->nz_used      = isend[0];
647     info->nz_allocated = isend[1];
648     info->nz_unneeded  = isend[2];
649     info->memory       = isend[3];
650     info->mallocs      = isend[4];
651   } else if (flag == MAT_GLOBAL_MAX) {
652     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
653     info->nz_used      = irecv[0];
654     info->nz_allocated = irecv[1];
655     info->nz_unneeded  = irecv[2];
656     info->memory       = irecv[3];
657     info->mallocs      = irecv[4];
658   } else if (flag == MAT_GLOBAL_SUM) {
659     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
660     info->nz_used      = irecv[0];
661     info->nz_allocated = irecv[1];
662     info->nz_unneeded  = irecv[2];
663     info->memory       = irecv[3];
664     info->mallocs      = irecv[4];
665   }
666   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
667   info->fill_ratio_needed = 0;
668   info->factor_mallocs    = 0;
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNC__
673 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIDense"
674 int MatSetOption_MPIDense(Mat A,MatOption op)
675 {
676   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
677 
678   PetscFunctionBegin;
679   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
680       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
681       op == MAT_NEW_NONZERO_LOCATION_ERR ||
682       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
683       op == MAT_COLUMNS_SORTED ||
684       op == MAT_COLUMNS_UNSORTED) {
685         MatSetOption(a->A,op);
686   } else if (op == MAT_ROW_ORIENTED) {
687         a->roworiented = 1;
688         MatSetOption(a->A,op);
689   } else if (op == MAT_ROWS_SORTED ||
690              op == MAT_ROWS_UNSORTED ||
691              op == MAT_SYMMETRIC ||
692              op == MAT_STRUCTURALLY_SYMMETRIC ||
693              op == MAT_YES_NEW_DIAGONALS ||
694              op == MAT_USE_HASH_TABLE) {
695     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
696   } else if (op == MAT_COLUMN_ORIENTED) {
697     a->roworiented = 0; MatSetOption(a->A,op);
698   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
699     a->donotstash = 1;
700   } else if (op == MAT_NO_NEW_DIAGONALS) {
701     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
702   } else {
703     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
704   }
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNC__
709 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIDense"
710 int MatGetSize_MPIDense(Mat A,int *m,int *n)
711 {
712   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
713 
714   PetscFunctionBegin;
715   if (m) *m = mat->M;
716   if (n) *n = mat->N;
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNC__
721 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIDense"
722 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
723 {
724   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
725 
726   PetscFunctionBegin;
727   *m = mat->m; *n = mat->N;
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNC__
732 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIDense"
733 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
734 {
735   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
736 
737   PetscFunctionBegin;
738   if (m) *m = mat->rstart;
739   if (n) *n = mat->rend;
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNC__
744 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIDense"
745 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
746 {
747   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
748   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
749 
750   PetscFunctionBegin;
751   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
752   lrow = row - rstart;
753   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNC__
758 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIDense"
759 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
760 {
761   int ierr;
762 
763   PetscFunctionBegin;
764   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
765   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNC__
770 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIDense"
771 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
772 {
773   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
774   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
775   Scalar       *l,*r,x,*v;
776   int          ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n;
777 
778   PetscFunctionBegin;
779   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
780   if (ll) {
781     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
782     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size");
783     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
784     for (i=0; i<m; i++) {
785       x = l[i];
786       v = mat->v + i;
787       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
788     }
789     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
790     PLogFlops(n*m);
791   }
792   if (rr) {
793     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
794     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size");
795     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
796     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
797     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
798     for (i=0; i<n; i++) {
799       x = r[i];
800       v = mat->v + i*m;
801       for (j=0; j<m; j++) { (*v++) *= x;}
802     }
803     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
804     PLogFlops(n*m);
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNC__
810 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIDense"
811 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm)
812 {
813   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
814   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
815   int          ierr,i,j;
816   PetscReal    sum = 0.0;
817   Scalar       *v = mat->v;
818 
819   PetscFunctionBegin;
820   if (mdn->size == 1) {
821     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
822   } else {
823     if (type == NORM_FROBENIUS) {
824       for (i=0; i<mat->n*mat->m; i++) {
825 #if defined(PETSC_USE_COMPLEX)
826         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
827 #else
828         sum += (*v)*(*v); v++;
829 #endif
830       }
831       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
832       *norm = sqrt(*norm);
833       PLogFlops(2*mat->n*mat->m);
834     } else if (type == NORM_1) {
835       PetscReal *tmp,*tmp2;
836       tmp  = (PetscReal*)PetscMalloc(2*mdn->N*sizeof(PetscReal));CHKPTRQ(tmp);
837       tmp2 = tmp + mdn->N;
838       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(PetscReal));CHKERRQ(ierr);
839       *norm = 0.0;
840       v = mat->v;
841       for (j=0; j<mat->n; j++) {
842         for (i=0; i<mat->m; i++) {
843           tmp[j] += PetscAbsScalar(*v);  v++;
844         }
845       }
846       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
847       for (j=0; j<mdn->N; j++) {
848         if (tmp2[j] > *norm) *norm = tmp2[j];
849       }
850       ierr = PetscFree(tmp);CHKERRQ(ierr);
851       PLogFlops(mat->n*mat->m);
852     } else if (type == NORM_INFINITY) { /* max row norm */
853       PetscReal ntemp;
854       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
855       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
856     } else {
857       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
858     }
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 #undef __FUNC__
864 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIDense"
865 int MatTranspose_MPIDense(Mat A,Mat *matout)
866 {
867   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
868   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
869   Mat          B;
870   int          M = a->M,N = a->N,m,n,*rwork,rstart = a->rstart;
871   int          j,i,ierr;
872   Scalar       *v;
873 
874   PetscFunctionBegin;
875   if (!matout && M != N) {
876     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
877   }
878   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
879 
880   m = Aloc->m; n = Aloc->n; v = Aloc->v;
881   rwork = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
882   for (j=0; j<n; j++) {
883     for (i=0; i<m; i++) rwork[i] = rstart + i;
884     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
885     v   += m;
886   }
887   ierr = PetscFree(rwork);CHKERRQ(ierr);
888   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
889   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
890   if (matout) {
891     *matout = B;
892   } else {
893     PetscOps *Abops;
894     MatOps   Aops;
895 
896     /* This isn't really an in-place transpose, but free data struct from a */
897     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
898     ierr = MatDestroy(a->A);CHKERRQ(ierr);
899     if (a->lvec) VecDestroy(a->lvec);
900     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
901     ierr = PetscFree(a);CHKERRQ(ierr);
902 
903     /*
904          This is horrible, horrible code. We need to keep the
905       A pointers for the bops and ops but copy everything
906       else from C.
907     */
908     Abops   = A->bops;
909     Aops    = A->ops;
910     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
911     A->bops = Abops;
912     A->ops  = Aops;
913 
914     PetscHeaderDestroy(B);
915   }
916   PetscFunctionReturn(0);
917 }
918 
919 #include "pinclude/blaslapack.h"
920 #undef __FUNC__
921 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIDense"
922 int MatScale_MPIDense(Scalar *alpha,Mat inA)
923 {
924   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
925   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
926   int          one = 1,nz;
927 
928   PetscFunctionBegin;
929   nz = a->m*a->n;
930   BLscal_(&nz,alpha,a->v,&one);
931   PLogFlops(nz);
932   PetscFunctionReturn(0);
933 }
934 
935 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
936 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
937 
938 /* -------------------------------------------------------------------*/
939 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
940        MatGetRow_MPIDense,
941        MatRestoreRow_MPIDense,
942        MatMult_MPIDense,
943        MatMultAdd_MPIDense,
944        MatMultTranspose_MPIDense,
945        MatMultTransposeAdd_MPIDense,
946        0,
947        0,
948        0,
949        0,
950        0,
951        0,
952        0,
953        MatTranspose_MPIDense,
954        MatGetInfo_MPIDense,0,
955        MatGetDiagonal_MPIDense,
956        MatDiagonalScale_MPIDense,
957        MatNorm_MPIDense,
958        MatAssemblyBegin_MPIDense,
959        MatAssemblyEnd_MPIDense,
960        0,
961        MatSetOption_MPIDense,
962        MatZeroEntries_MPIDense,
963        MatZeroRows_MPIDense,
964        0,
965        0,
966        0,
967        0,
968        MatGetSize_MPIDense,
969        MatGetLocalSize_MPIDense,
970        MatGetOwnershipRange_MPIDense,
971        0,
972        0,
973        MatGetArray_MPIDense,
974        MatRestoreArray_MPIDense,
975        MatDuplicate_MPIDense,
976        0,
977        0,
978        0,
979        0,
980        0,
981        MatGetSubMatrices_MPIDense,
982        0,
983        MatGetValues_MPIDense,
984        0,
985        0,
986        MatScale_MPIDense,
987        0,
988        0,
989        0,
990        MatGetBlockSize_MPIDense,
991        0,
992        0,
993        0,
994        0,
995        0,
996        0,
997        0,
998        0,
999        0,
1000        MatGetSubMatrix_MPIDense,
1001        0,
1002        0,
1003        MatGetMaps_Petsc};
1004 
1005 #undef __FUNC__
1006 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIDense"
1007 /*@C
1008    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1009 
1010    Collective on MPI_Comm
1011 
1012    Input Parameters:
1013 +  comm - MPI communicator
1014 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1015 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1016 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1017 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1018 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1019    to control all matrix memory allocation.
1020 
1021    Output Parameter:
1022 .  A - the matrix
1023 
1024    Notes:
1025    The dense format is fully compatible with standard Fortran 77
1026    storage by columns.
1027 
1028    The data input variable is intended primarily for Fortran programmers
1029    who wish to allocate their own matrix memory space.  Most users should
1030    set data=PETSC_NULL.
1031 
1032    The user MUST specify either the local or global matrix dimensions
1033    (possibly both).
1034 
1035    Level: intermediate
1036 
1037 .keywords: matrix,dense, parallel
1038 
1039 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1040 @*/
1041 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1042 {
1043   Mat          mat;
1044   Mat_MPIDense *a;
1045   int          ierr,i;
1046   PetscTruth   flg;
1047 
1048   PetscFunctionBegin;
1049   /* Note:  For now, when data is specified above, this assumes the user correctly
1050    allocates the local dense storage space.  We should add error checking. */
1051 
1052   *A = 0;
1053   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
1054   PLogObjectCreate(mat);
1055   mat->data         = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1056   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1057   mat->ops->destroy = MatDestroy_MPIDense;
1058   mat->ops->view    = MatView_MPIDense;
1059   mat->factor       = 0;
1060   mat->mapping      = 0;
1061 
1062   a->factor       = 0;
1063   mat->insertmode = NOT_SET_VALUES;
1064   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
1065   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
1066 
1067   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1068 
1069   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1070   a->nvec = n;
1071 
1072   /* each row stores all columns */
1073   a->N = mat->N = N;
1074   a->M = mat->M = M;
1075   a->m = mat->m = m;
1076   a->n = mat->n = N;   /* NOTE: n == N */
1077 
1078   /* the information in the maps duplicates the information computed below, eventually
1079      we should remove the duplicate information that is not contained in the maps */
1080   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1081   ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr);
1082 
1083   /* build local table of row and column ownerships */
1084   a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1085   a->cowners = a->rowners + a->size + 1;
1086   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1087   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1088   a->rowners[0] = 0;
1089   for (i=2; i<=a->size; i++) {
1090     a->rowners[i] += a->rowners[i-1];
1091   }
1092   a->rstart = a->rowners[a->rank];
1093   a->rend   = a->rowners[a->rank+1];
1094   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1095   a->cowners[0] = 0;
1096   for (i=2; i<=a->size; i++) {
1097     a->cowners[i] += a->cowners[i-1];
1098   }
1099 
1100   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1101   PLogObjectParent(mat,a->A);
1102 
1103   /* build cache for off array entries formed */
1104   a->donotstash = 0;
1105   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1106 
1107   /* stuff used for matrix vector multiply */
1108   a->lvec        = 0;
1109   a->Mvctx       = 0;
1110   a->roworiented = 1;
1111 
1112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1113                                      "MatGetDiagonalBlock_MPIDense",
1114                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1115 
1116   *A = mat;
1117   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1118   if (flg) {
1119     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1120   }
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNC__
1125 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIDense"
1126 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1127 {
1128   Mat          mat;
1129   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1130   int          ierr;
1131   FactorCtx    *factor;
1132 
1133   PetscFunctionBegin;
1134   *newmat       = 0;
1135   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1136   PLogObjectCreate(mat);
1137   mat->data         = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1138   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1139   mat->ops->destroy = MatDestroy_MPIDense;
1140   mat->ops->view    = MatView_MPIDense;
1141   mat->factor       = A->factor;
1142   mat->assembled    = PETSC_TRUE;
1143 
1144   a->m = mat->m = oldmat->m;
1145   a->n = mat->n = oldmat->n;
1146   a->M = mat->M = oldmat->M;
1147   a->N = mat->N = oldmat->N;
1148   if (oldmat->factor) {
1149     a->factor = (FactorCtx*)(factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1150     /* copy factor contents ... add this code! */
1151   } else a->factor = 0;
1152 
1153   a->rstart       = oldmat->rstart;
1154   a->rend         = oldmat->rend;
1155   a->size         = oldmat->size;
1156   a->rank         = oldmat->rank;
1157   mat->insertmode = NOT_SET_VALUES;
1158   a->donotstash   = oldmat->donotstash;
1159   a->rowners = (int*)PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1160   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1161   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1162   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1163 
1164   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1165   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1166   PLogObjectParent(mat,a->A);
1167   *newmat = mat;
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #include "petscsys.h"
1172 
1173 #undef __FUNC__
1174 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense_DenseInFile"
1175 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1176 {
1177   int        *rowners,i,size,rank,m,ierr,nz,j;
1178   Scalar     *array,*vals,*vals_ptr;
1179   MPI_Status status;
1180 
1181   PetscFunctionBegin;
1182   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1183   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1184 
1185   /* determine ownership of all rows */
1186   m          = M/size + ((M % size) > rank);
1187   rowners    = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1188   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1189   rowners[0] = 0;
1190   for (i=2; i<=size; i++) {
1191     rowners[i] += rowners[i-1];
1192   }
1193 
1194   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1195   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1196 
1197   if (!rank) {
1198     vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals);
1199 
1200     /* read in my part of the matrix numerical values  */
1201     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1202 
1203     /* insert into matrix-by row (this is why cannot directly read into array */
1204     vals_ptr = vals;
1205     for (i=0; i<m; i++) {
1206       for (j=0; j<N; j++) {
1207         array[i + j*m] = *vals_ptr++;
1208       }
1209     }
1210 
1211     /* read in other processors and ship out */
1212     for (i=1; i<size; i++) {
1213       nz   = (rowners[i+1] - rowners[i])*N;
1214       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1215       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1216     }
1217   } else {
1218     /* receive numeric values */
1219     vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals);
1220 
1221     /* receive message of values*/
1222     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1223 
1224     /* insert into matrix-by row (this is why cannot directly read into array */
1225     vals_ptr = vals;
1226     for (i=0; i<m; i++) {
1227       for (j=0; j<N; j++) {
1228         array[i + j*m] = *vals_ptr++;
1229       }
1230     }
1231   }
1232   ierr = PetscFree(rowners);CHKERRQ(ierr);
1233   ierr = PetscFree(vals);CHKERRQ(ierr);
1234   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1235   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 
1240 #undef __FUNC__
1241 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense"
1242 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1243 {
1244   Mat          A;
1245   Scalar       *vals,*svals;
1246   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1247   MPI_Status   status;
1248   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1249   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1250   int          tag = ((PetscObject)viewer)->tag;
1251   int          i,nz,ierr,j,rstart,rend,fd;
1252 
1253   PetscFunctionBegin;
1254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1255   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1256   if (!rank) {
1257     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1258     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1259     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1260   }
1261 
1262   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1263   M = header[1]; N = header[2]; nz = header[3];
1264 
1265   /*
1266        Handle case where matrix is stored on disk as a dense matrix
1267   */
1268   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1269     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1270     PetscFunctionReturn(0);
1271   }
1272 
1273   /* determine ownership of all rows */
1274   m          = M/size + ((M % size) > rank);
1275   rowners    = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1276   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1277   rowners[0] = 0;
1278   for (i=2; i<=size; i++) {
1279     rowners[i] += rowners[i-1];
1280   }
1281   rstart = rowners[rank];
1282   rend   = rowners[rank+1];
1283 
1284   /* distribute row lengths to all processors */
1285   ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens);
1286   offlens = ourlens + (rend-rstart);
1287   if (!rank) {
1288     rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths);
1289     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1290     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
1291     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1292     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1293     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1294   } else {
1295     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1296   }
1297 
1298   if (!rank) {
1299     /* calculate the number of nonzeros on each processor */
1300     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
1301     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1302     for (i=0; i<size; i++) {
1303       for (j=rowners[i]; j< rowners[i+1]; j++) {
1304         procsnz[i] += rowlengths[j];
1305       }
1306     }
1307     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1308 
1309     /* determine max buffer needed and allocate it */
1310     maxnz = 0;
1311     for (i=0; i<size; i++) {
1312       maxnz = PetscMax(maxnz,procsnz[i]);
1313     }
1314     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
1315 
1316     /* read in my part of the matrix column indices  */
1317     nz = procsnz[0];
1318     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1319     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1320 
1321     /* read in every one elses and ship off */
1322     for (i=1; i<size; i++) {
1323       nz   = procsnz[i];
1324       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1325       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1326     }
1327     ierr = PetscFree(cols);CHKERRQ(ierr);
1328   } else {
1329     /* determine buffer space needed for message */
1330     nz = 0;
1331     for (i=0; i<m; i++) {
1332       nz += ourlens[i];
1333     }
1334     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1335 
1336     /* receive message of column indices*/
1337     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1338     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1339     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1340   }
1341 
1342   /* loop over local rows, determining number of off diagonal entries */
1343   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1344   jj = 0;
1345   for (i=0; i<m; i++) {
1346     for (j=0; j<ourlens[i]; j++) {
1347       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1348       jj++;
1349     }
1350   }
1351 
1352   /* create our matrix */
1353   for (i=0; i<m; i++) {
1354     ourlens[i] -= offlens[i];
1355   }
1356   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1357   A = *newmat;
1358   for (i=0; i<m; i++) {
1359     ourlens[i] += offlens[i];
1360   }
1361 
1362   if (!rank) {
1363     vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals);
1364 
1365     /* read in my part of the matrix numerical values  */
1366     nz = procsnz[0];
1367     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1368 
1369     /* insert into matrix */
1370     jj      = rstart;
1371     smycols = mycols;
1372     svals   = vals;
1373     for (i=0; i<m; i++) {
1374       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1375       smycols += ourlens[i];
1376       svals   += ourlens[i];
1377       jj++;
1378     }
1379 
1380     /* read in other processors and ship out */
1381     for (i=1; i<size; i++) {
1382       nz   = procsnz[i];
1383       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1384       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1385     }
1386     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1387   } else {
1388     /* receive numeric values */
1389     vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals);
1390 
1391     /* receive message of values*/
1392     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1393     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1394     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1395 
1396     /* insert into matrix */
1397     jj      = rstart;
1398     smycols = mycols;
1399     svals   = vals;
1400     for (i=0; i<m; i++) {
1401       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1402       smycols += ourlens[i];
1403       svals   += ourlens[i];
1404       jj++;
1405     }
1406   }
1407   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1408   ierr = PetscFree(vals);CHKERRQ(ierr);
1409   ierr = PetscFree(mycols);CHKERRQ(ierr);
1410   ierr = PetscFree(rowners);CHKERRQ(ierr);
1411 
1412   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1413   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 
1418 
1419 
1420 
1421