xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 35d8aa7f675fa4ee5c80aa031e65d3ef307a4891)
1 /*$Id: mpidense.c,v 1.145 2000/10/24 20:25:30 bsmith Exp bsmith $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNC__
12 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,rank,ierr;
17   Scalar       *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
27   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
28   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
29   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
30   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
33 
34   *iscopy = PETSC_TRUE;
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN int MatSetUpMultiply_MPIDense(Mat);
40 
41 #undef __FUNC__
42 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIDense"
43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
44 {
45   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
46   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
47   PetscTruth   roworiented = A->roworiented;
48 
49   PetscFunctionBegin;
50   for (i=0; i<m; i++) {
51     if (idxm[i] < 0) continue;
52     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
53     if (idxm[i] >= rstart && idxm[i] < rend) {
54       row = idxm[i] - rstart;
55       if (roworiented) {
56         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
57       } else {
58         for (j=0; j<n; j++) {
59           if (idxn[j] < 0) continue;
60           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
61           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
62         }
63       }
64     } else {
65       if (!A->donotstash) {
66         if (roworiented) {
67           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
68         } else {
69           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
70         }
71       }
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIDense"
79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
80 {
81   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
82   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
83 
84   PetscFunctionBegin;
85   for (i=0; i<m; i++) {
86     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
87     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
88     if (idxm[i] >= rstart && idxm[i] < rend) {
89       row = idxm[i] - rstart;
90       for (j=0; j<n; j++) {
91         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
92         if (idxn[j] >= mat->N) {
93           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
94         }
95         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
96       }
97     } else {
98       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
99     }
100   }
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNC__
105 #define __FUNC__ /*<a name=""></a>*/"MatGetArray_MPIDense"
106 int MatGetArray_MPIDense(Mat A,Scalar **array)
107 {
108   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
109   int          ierr;
110 
111   PetscFunctionBegin;
112   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNC__
117 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIDense"
118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
119 {
120   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
121   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
122   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols,rank;
123   Scalar       *av,*bv,*v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
128   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
129   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
130   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
131   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
132 
133   /* No parallel redistribution currently supported! Should really check each index set
134      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
135      original matrix! */
136 
137   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
138   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
139 
140   /* Check submatrix call */
141   if (scall == MAT_REUSE_MATRIX) {
142     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
143     /* Really need to test rows and column sizes! */
144     newmat = *B;
145   } else {
146     /* Create and fill new matrix */
147     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
148   }
149 
150   /* Now extract the data pointers and do the copy, column at a time */
151   newmatd = (Mat_MPIDense*)newmat->data;
152   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
153 
154   for (i=0; i<ncols; i++) {
155     av = v + nlrows*icol[i];
156     for (j=0; j<nrows; j++) {
157       *bv++ = av[irow[j] - rstart];
158     }
159   }
160 
161   /* Assemble the matrices so that the correct flags are set */
162   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
163   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164 
165   /* Free work space */
166   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
167   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
168   *B = newmat;
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNC__
173 #define __FUNC__ /*<a name=""></a>*/"MatRestoreArray_MPIDense"
174 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
175 {
176   PetscFunctionBegin;
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNC__
181 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIDense"
182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
183 {
184   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
185   MPI_Comm     comm = mat->comm;
186   int          ierr,nstash,reallocs;
187   InsertMode   addv;
188 
189   PetscFunctionBegin;
190   /* make sure all processors are either in INSERTMODE or ADDMODE */
191   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
192   if (addv == (ADD_VALUES|INSERT_VALUES)) {
193     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
194   }
195   mat->insertmode = addv; /* in case this processor had no cache */
196 
197   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
198   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
199   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIDense"
205 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
206 {
207   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
208   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
209   Scalar       *val;
210   InsertMode   addv=mat->insertmode;
211 
212   PetscFunctionBegin;
213   /*  wait on receives */
214   while (1) {
215     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
216     if (!flg) break;
217 
218     for (i=0; i<n;) {
219       /* Now identify the consecutive vals belonging to the same row */
220       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
221       if (j < n) ncols = j-i;
222       else       ncols = n-i;
223       /* Now assemble all these values with a single function call */
224       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
225       i = j;
226     }
227   }
228   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
229 
230   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNC__
240 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIDense"
241 int MatZeroEntries_MPIDense(Mat A)
242 {
243   int          ierr;
244   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
245 
246   PetscFunctionBegin;
247   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNC__
252 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIDense"
253 int MatGetBlockSize_MPIDense(Mat A,int *bs)
254 {
255   PetscFunctionBegin;
256   *bs = 1;
257   PetscFunctionReturn(0);
258 }
259 
260 /* the code does not do the diagonal entries correctly unless the
261    matrix is square and the column and row owerships are identical.
262    This is a BUG. The only way to fix it seems to be to access
263    mdn->A and mdn->B directly and not through the MatZeroRows()
264    routine.
265 */
266 #undef __FUNC__
267 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIDense"
268 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
269 {
270   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
271   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
272   int            *procs,*nprocs,j,idx,nsends,*work;
273   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
274   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
275   int            *lens,imdex,*lrows,*values;
276   MPI_Comm       comm = A->comm;
277   MPI_Request    *send_waits,*recv_waits;
278   MPI_Status     recv_status,*send_status;
279   IS             istmp;
280   PetscTruth     found;
281 
282   PetscFunctionBegin;
283   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
284   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
285 
286   /*  first count number of contributors to each processor */
287   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
288   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
289   procs  = nprocs + size;
290   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
291   for (i=0; i<N; i++) {
292     idx = rows[i];
293     found = PETSC_FALSE;
294     for (j=0; j<size; j++) {
295       if (idx >= owners[j] && idx < owners[j+1]) {
296         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
297       }
298     }
299     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
300   }
301   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
302 
303   /* inform other processors of number of messages and max length*/
304   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
305   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
306   nmax   = work[rank];
307   nrecvs = work[size+rank];
308   ierr   = PetscFree(work);CHKERRQ(ierr);
309 
310   /* post receives:   */
311   rvalues    = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
312   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
313   for (i=0; i<nrecvs; i++) {
314     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
315   }
316 
317   /* do sends:
318       1) starts[i] gives the starting index in svalues for stuff going to
319          the ith processor
320   */
321   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
322   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
323   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
324   starts[0]  = 0;
325   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
326   for (i=0; i<N; i++) {
327     svalues[starts[owner[i]]++] = rows[i];
328   }
329   ISRestoreIndices(is,&rows);
330 
331   starts[0] = 0;
332   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
333   count = 0;
334   for (i=0; i<size; i++) {
335     if (procs[i]) {
336       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
337     }
338   }
339   ierr = PetscFree(starts);CHKERRQ(ierr);
340 
341   base = owners[rank];
342 
343   /*  wait on receives */
344   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
345   source = lens + nrecvs;
346   count  = nrecvs; slen = 0;
347   while (count) {
348     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
349     /* unpack receives into our local space */
350     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
351     source[imdex]  = recv_status.MPI_SOURCE;
352     lens[imdex]  = n;
353     slen += n;
354     count--;
355   }
356   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
357 
358   /* move the data into the send scatter */
359   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
360   count = 0;
361   for (i=0; i<nrecvs; i++) {
362     values = rvalues + i*nmax;
363     for (j=0; j<lens[i]; j++) {
364       lrows[count++] = values[j] - base;
365     }
366   }
367   ierr = PetscFree(rvalues);CHKERRQ(ierr);
368   ierr = PetscFree(lens);CHKERRQ(ierr);
369   ierr = PetscFree(owner);CHKERRQ(ierr);
370   ierr = PetscFree(nprocs);CHKERRQ(ierr);
371 
372   /* actually zap the local rows */
373   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
374   PLogObjectParent(A,istmp);
375   ierr = PetscFree(lrows);CHKERRQ(ierr);
376   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
377   ierr = ISDestroy(istmp);CHKERRQ(ierr);
378 
379   /* wait on sends */
380   if (nsends) {
381     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
382     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
383     ierr = PetscFree(send_status);CHKERRQ(ierr);
384   }
385   ierr = PetscFree(send_waits);CHKERRQ(ierr);
386   ierr = PetscFree(svalues);CHKERRQ(ierr);
387 
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNC__
392 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIDense"
393 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
394 {
395   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
396   int          ierr;
397 
398   PetscFunctionBegin;
399   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
401   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNC__
406 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIDense"
407 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
408 {
409   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
410   int          ierr;
411 
412   PetscFunctionBegin;
413   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
415   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNC__
420 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIDense"
421 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
422 {
423   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
424   int          ierr;
425   Scalar       zero = 0.0;
426 
427   PetscFunctionBegin;
428   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
429   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
430   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
431   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 #undef __FUNC__
436 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIDense"
437 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
438 {
439   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
440   int          ierr;
441 
442   PetscFunctionBegin;
443   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
444   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
445   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNC__
451 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIDense"
452 int MatGetDiagonal_MPIDense(Mat A,Vec v)
453 {
454   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
455   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
456   int          ierr,len,i,n,m = A->m,radd;
457   Scalar       *x,zero = 0.0;
458 
459   PetscFunctionBegin;
460   ierr = VecSet(&zero,v);CHKERRQ(ierr);
461   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
462   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
463   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
464   len  = PetscMin(a->A->m,a->A->n);
465   radd = a->rstart*m;
466   for (i=0; i<len; i++) {
467     x[i] = aloc->v[radd + i*m + i];
468   }
469   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
470   PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNC__
474 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIDense"
475 int MatDestroy_MPIDense(Mat mat)
476 {
477   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
478   int          ierr;
479 
480   PetscFunctionBegin;
481 
482 #if defined(PETSC_USE_LOG)
483   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
484 #endif
485   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
486   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
487   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
488   if (mdn->lvec)   VecDestroy(mdn->lvec);
489   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
490   if (mdn->factor) {
491     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
492     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
493     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
494     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
495   }
496   ierr = PetscFree(mdn);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNC__
501 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_Binary"
502 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
503 {
504   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
505   int          ierr;
506 
507   PetscFunctionBegin;
508   if (mdn->size == 1) {
509     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
510   }
511   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
512   PetscFunctionReturn(0);
513 }
514 
515 #undef __FUNC__
516 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_ASCIIorDraworSocket"
517 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,Viewer viewer)
518 {
519   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
520   int          ierr,format,size = mdn->size,rank = mdn->rank;
521   ViewerType   vtype;
522   PetscTruth   isascii,isdraw;
523   Viewer       sviewer;
524 
525   PetscFunctionBegin;
526   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
527   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
528   if (isascii) {
529     ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
530     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
531     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
532       MatInfo info;
533       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
534       ierr = ViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
535                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
536       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
537       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
538       PetscFunctionReturn(0);
539     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
540       PetscFunctionReturn(0);
541     }
542   } else if (isdraw) {
543     Draw       draw;
544     PetscTruth isnull;
545 
546     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
547     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr);
548     if (isnull) PetscFunctionReturn(0);
549   }
550 
551   if (size == 1) {
552     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
553   } else {
554     /* assemble the entire matrix onto first processor. */
555     Mat          A;
556     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
557     Scalar       *vals;
558 
559     if (!rank) {
560       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
561     } else {
562       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
563     }
564     PLogObjectParent(mat,A);
565 
566     /* Copy the matrix ... This isn't the most efficient means,
567        but it's quick for now */
568     row = mdn->rstart; m = mdn->A->m;
569     for (i=0; i<m; i++) {
570       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
571       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
572       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
573       row++;
574     }
575 
576     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
577     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
578     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
579     if (!rank) {
580       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
581     }
582     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
583     ierr = ViewerFlush(viewer);CHKERRQ(ierr);
584     ierr = MatDestroy(A);CHKERRQ(ierr);
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNC__
590 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense"
591 int MatView_MPIDense(Mat mat,Viewer viewer)
592 {
593   int        ierr;
594   PetscTruth isascii,isbinary,isdraw,issocket;
595 
596   PetscFunctionBegin;
597 
598   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
601   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
602 
603   if (isascii || issocket || isdraw) {
604     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
605   } else if (isbinary) {
606     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
607   } else {
608     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
609   }
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNC__
614 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIDense"
615 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
616 {
617   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
618   Mat          mdn = mat->A;
619   int          ierr;
620   PetscReal    isend[5],irecv[5];
621 
622   PetscFunctionBegin;
623   info->rows_global    = (double)A->M;
624   info->columns_global = (double)A->N;
625   info->rows_local     = (double)A->m;
626   info->columns_local  = (double)A->N;
627   info->block_size     = 1.0;
628   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
629   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
630   isend[3] = info->memory;  isend[4] = info->mallocs;
631   if (flag == MAT_LOCAL) {
632     info->nz_used      = isend[0];
633     info->nz_allocated = isend[1];
634     info->nz_unneeded  = isend[2];
635     info->memory       = isend[3];
636     info->mallocs      = isend[4];
637   } else if (flag == MAT_GLOBAL_MAX) {
638     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
639     info->nz_used      = irecv[0];
640     info->nz_allocated = irecv[1];
641     info->nz_unneeded  = irecv[2];
642     info->memory       = irecv[3];
643     info->mallocs      = irecv[4];
644   } else if (flag == MAT_GLOBAL_SUM) {
645     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
646     info->nz_used      = irecv[0];
647     info->nz_allocated = irecv[1];
648     info->nz_unneeded  = irecv[2];
649     info->memory       = irecv[3];
650     info->mallocs      = irecv[4];
651   }
652   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
653   info->fill_ratio_needed = 0;
654   info->factor_mallocs    = 0;
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNC__
659 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIDense"
660 int MatSetOption_MPIDense(Mat A,MatOption op)
661 {
662   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
663   int          ierr;
664 
665   PetscFunctionBegin;
666   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
667       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
668       op == MAT_NEW_NONZERO_LOCATION_ERR ||
669       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
670       op == MAT_COLUMNS_SORTED ||
671       op == MAT_COLUMNS_UNSORTED) {
672         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
673   } else if (op == MAT_ROW_ORIENTED) {
674         a->roworiented = PETSC_TRUE;
675         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
676   } else if (op == MAT_ROWS_SORTED ||
677              op == MAT_ROWS_UNSORTED ||
678              op == MAT_SYMMETRIC ||
679              op == MAT_STRUCTURALLY_SYMMETRIC ||
680              op == MAT_YES_NEW_DIAGONALS ||
681              op == MAT_USE_HASH_TABLE) {
682     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
683   } else if (op == MAT_COLUMN_ORIENTED) {
684     a->roworiented = PETSC_FALSE;
685     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
686   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
687     a->donotstash = PETSC_TRUE;
688   } else if (op == MAT_NO_NEW_DIAGONALS) {
689     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
690   } else {
691     SETERRQ(PETSC_ERR_SUP,"unknown option");
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 #undef __FUNC__
697 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIDense"
698 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
699 {
700   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
701 
702   PetscFunctionBegin;
703   if (m) *m = mat->rstart;
704   if (n) *n = mat->rend;
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNC__
709 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIDense"
710 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
711 {
712   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
713   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
714 
715   PetscFunctionBegin;
716   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
717   lrow = row - rstart;
718   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNC__
723 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIDense"
724 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
725 {
726   int ierr;
727 
728   PetscFunctionBegin;
729   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
730   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNC__
735 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIDense"
736 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
737 {
738   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
739   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
740   Scalar       *l,*r,x,*v;
741   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
742 
743   PetscFunctionBegin;
744   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
745   if (ll) {
746     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
747     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
748     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
749     for (i=0; i<m; i++) {
750       x = l[i];
751       v = mat->v + i;
752       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
753     }
754     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
755     PLogFlops(n*m);
756   }
757   if (rr) {
758     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
759     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
760     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
761     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
762     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
763     for (i=0; i<n; i++) {
764       x = r[i];
765       v = mat->v + i*m;
766       for (j=0; j<m; j++) { (*v++) *= x;}
767     }
768     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
769     PLogFlops(n*m);
770   }
771   PetscFunctionReturn(0);
772 }
773 
774 #undef __FUNC__
775 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIDense"
776 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm)
777 {
778   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
779   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
780   int          ierr,i,j;
781   PetscReal    sum = 0.0;
782   Scalar       *v = mat->v;
783 
784   PetscFunctionBegin;
785   if (mdn->size == 1) {
786     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
787   } else {
788     if (type == NORM_FROBENIUS) {
789       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
790 #if defined(PETSC_USE_COMPLEX)
791         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
792 #else
793         sum += (*v)*(*v); v++;
794 #endif
795       }
796       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
797       *norm = sqrt(*norm);
798       PLogFlops(2*mdn->A->n*mdn->A->m);
799     } else if (type == NORM_1) {
800       PetscReal *tmp,*tmp2;
801       tmp  = (PetscReal*)PetscMalloc(2*A->N*sizeof(PetscReal));CHKPTRQ(tmp);
802       tmp2 = tmp + A->N;
803       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
804       *norm = 0.0;
805       v = mat->v;
806       for (j=0; j<mdn->A->n; j++) {
807         for (i=0; i<mdn->A->m; i++) {
808           tmp[j] += PetscAbsScalar(*v);  v++;
809         }
810       }
811       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
812       for (j=0; j<A->N; j++) {
813         if (tmp2[j] > *norm) *norm = tmp2[j];
814       }
815       ierr = PetscFree(tmp);CHKERRQ(ierr);
816       PLogFlops(A->n*A->m);
817     } else if (type == NORM_INFINITY) { /* max row norm */
818       PetscReal ntemp;
819       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
820       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
821     } else {
822       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
823     }
824   }
825   PetscFunctionReturn(0);
826 }
827 
828 #undef __FUNC__
829 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIDense"
830 int MatTranspose_MPIDense(Mat A,Mat *matout)
831 {
832   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
833   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
834   Mat          B;
835   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
836   int          j,i,ierr;
837   Scalar       *v;
838 
839   PetscFunctionBegin;
840   if (!matout && M != N) {
841     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
842   }
843   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
844 
845   m = a->A->m; n = a->A->n; v = Aloc->v;
846   rwork = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
847   for (j=0; j<n; j++) {
848     for (i=0; i<m; i++) rwork[i] = rstart + i;
849     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
850     v   += m;
851   }
852   ierr = PetscFree(rwork);CHKERRQ(ierr);
853   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
855   if (matout) {
856     *matout = B;
857   } else {
858     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 #include "petscblaslapack.h"
864 #undef __FUNC__
865 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIDense"
866 int MatScale_MPIDense(Scalar *alpha,Mat inA)
867 {
868   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
869   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
870   int          one = 1,nz;
871 
872   PetscFunctionBegin;
873   nz = inA->m*inA->N;
874   BLscal_(&nz,alpha,a->v,&one);
875   PLogFlops(nz);
876   PetscFunctionReturn(0);
877 }
878 
879 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
880 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
881 
882 #undef __FUNC__
883 #define __FUNC__ /*<a name="MatSetUpPreallocation_MPIDense"></a>*/"MatSetUpPreallocation_MPIDense"
884 int MatSetUpPreallocation_MPIDense(Mat A)
885 {
886   int        ierr;
887 
888   PetscFunctionBegin;
889   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 /* -------------------------------------------------------------------*/
894 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
895        MatGetRow_MPIDense,
896        MatRestoreRow_MPIDense,
897        MatMult_MPIDense,
898        MatMultAdd_MPIDense,
899        MatMultTranspose_MPIDense,
900        MatMultTransposeAdd_MPIDense,
901        0,
902        0,
903        0,
904        0,
905        0,
906        0,
907        0,
908        MatTranspose_MPIDense,
909        MatGetInfo_MPIDense,0,
910        MatGetDiagonal_MPIDense,
911        MatDiagonalScale_MPIDense,
912        MatNorm_MPIDense,
913        MatAssemblyBegin_MPIDense,
914        MatAssemblyEnd_MPIDense,
915        0,
916        MatSetOption_MPIDense,
917        MatZeroEntries_MPIDense,
918        MatZeroRows_MPIDense,
919        0,
920        0,
921        0,
922        0,
923        MatSetUpPreallocation_MPIDense,
924        0,
925        MatGetOwnershipRange_MPIDense,
926        0,
927        0,
928        MatGetArray_MPIDense,
929        MatRestoreArray_MPIDense,
930        MatDuplicate_MPIDense,
931        0,
932        0,
933        0,
934        0,
935        0,
936        MatGetSubMatrices_MPIDense,
937        0,
938        MatGetValues_MPIDense,
939        0,
940        0,
941        MatScale_MPIDense,
942        0,
943        0,
944        0,
945        MatGetBlockSize_MPIDense,
946        0,
947        0,
948        0,
949        0,
950        0,
951        0,
952        0,
953        0,
954        0,
955        MatGetSubMatrix_MPIDense,
956        MatDestroy_MPIDense,
957        MatView_MPIDense,
958        MatGetMaps_Petsc};
959 
960 EXTERN_C_BEGIN
961 #undef __FUNC__
962 #define __FUNC__ /*<a name=""></a>*/"MatCreate_MPIDense"
963 int MatCreate_MPIDense(Mat mat)
964 {
965   Mat_MPIDense *a;
966   int          ierr,i;
967 
968   PetscFunctionBegin;
969   mat->data         = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
970   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
971   mat->factor       = 0;
972   mat->mapping      = 0;
973 
974   a->factor       = 0;
975   mat->insertmode = NOT_SET_VALUES;
976   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
977   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
978 
979   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
980   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
981   a->nvec = mat->n;
982 
983   /* the information in the maps duplicates the information computed below, eventually
984      we should remove the duplicate information that is not contained in the maps */
985   ierr = MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
986   ierr = MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
987 
988   /* build local table of row and column ownerships */
989   a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
990   a->cowners = a->rowners + a->size + 1;
991   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
992   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
993   a->rowners[0] = 0;
994   for (i=2; i<=a->size; i++) {
995     a->rowners[i] += a->rowners[i-1];
996   }
997   a->rstart = a->rowners[a->rank];
998   a->rend   = a->rowners[a->rank+1];
999   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1000   a->cowners[0] = 0;
1001   for (i=2; i<=a->size; i++) {
1002     a->cowners[i] += a->cowners[i-1];
1003   }
1004 
1005   /* build cache for off array entries formed */
1006   a->donotstash = PETSC_FALSE;
1007   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1008 
1009   /* stuff used for matrix vector multiply */
1010   a->lvec        = 0;
1011   a->Mvctx       = 0;
1012   a->roworiented = PETSC_TRUE;
1013 
1014   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1015                                      "MatGetDiagonalBlock_MPIDense",
1016                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 EXTERN_C_END
1020 
1021 #undef __FUNC__
1022 #define __FUNC__ /*<a name=""></a>*/"MatMPIDenseSetPreallocation"
1023 /*@C
1024    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1025 
1026    Not collective
1027 
1028    Input Parameters:
1029 .  A - the matrix
1030 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1031    to control all matrix memory allocation.
1032 
1033    Notes:
1034    The dense format is fully compatible with standard Fortran 77
1035    storage by columns.
1036 
1037    The data input variable is intended primarily for Fortran programmers
1038    who wish to allocate their own matrix memory space.  Most users should
1039    set data=PETSC_NULL.
1040 
1041    Level: intermediate
1042 
1043 .keywords: matrix,dense, parallel
1044 
1045 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1046 @*/
1047 int MatMPIDenseSetPreallocation(Mat mat,Scalar *data)
1048 {
1049   Mat_MPIDense *a;
1050   int          ierr;
1051   PetscTruth   flg2;
1052 
1053   PetscFunctionBegin;
1054   ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr);
1055   if (!flg2) PetscFunctionReturn(0);
1056   mat->preallocated = PETSC_TRUE;
1057   /* Note:  For now, when data is specified above, this assumes the user correctly
1058    allocates the local dense storage space.  We should add error checking. */
1059 
1060   a    = (Mat_MPIDense*)mat->data;
1061   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
1062   PLogObjectParent(mat,a->A);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNC__
1067 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIDense"
1068 /*@C
1069    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1070 
1071    Collective on MPI_Comm
1072 
1073    Input Parameters:
1074 +  comm - MPI communicator
1075 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1076 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1077 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1078 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1079 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1080    to control all matrix memory allocation.
1081 
1082    Output Parameter:
1083 .  A - the matrix
1084 
1085    Notes:
1086    The dense format is fully compatible with standard Fortran 77
1087    storage by columns.
1088 
1089    The data input variable is intended primarily for Fortran programmers
1090    who wish to allocate their own matrix memory space.  Most users should
1091    set data=PETSC_NULL.
1092 
1093    The user MUST specify either the local or global matrix dimensions
1094    (possibly both).
1095 
1096    Level: intermediate
1097 
1098 .keywords: matrix,dense, parallel
1099 
1100 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1101 @*/
1102 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1103 {
1104   int ierr,size;
1105 
1106   PetscFunctionBegin;
1107   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1108   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1109   if (size > 1) {
1110     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1111     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1112   } else {
1113     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1114     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1115   }
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNC__
1120 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIDense"
1121 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1122 {
1123   Mat          mat;
1124   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1125   int          ierr;
1126   FactorCtx    *factor;
1127 
1128   PetscFunctionBegin;
1129   *newmat       = 0;
1130   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1131   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1132   mat->data         = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1133   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1134   mat->factor       = A->factor;
1135   mat->assembled    = PETSC_TRUE;
1136   mat->preallocated = PETSC_TRUE;
1137 
1138   if (oldmat->factor) {
1139     a->factor = (FactorCtx*)(factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1140     /* copy factor contents ... add this code! */
1141   } else a->factor = 0;
1142 
1143   a->rstart       = oldmat->rstart;
1144   a->rend         = oldmat->rend;
1145   a->size         = oldmat->size;
1146   a->rank         = oldmat->rank;
1147   mat->insertmode = NOT_SET_VALUES;
1148   a->nvec         = oldmat->nvec;
1149   a->donotstash   = oldmat->donotstash;
1150   a->rowners = (int*)PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1151   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1152   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1153   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1154 
1155   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1156   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1157   PLogObjectParent(mat,a->A);
1158   *newmat = mat;
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #include "petscsys.h"
1163 
1164 #undef __FUNC__
1165 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense_DenseInFile"
1166 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1167 {
1168   int        *rowners,i,size,rank,m,ierr,nz,j;
1169   Scalar     *array,*vals,*vals_ptr;
1170   MPI_Status status;
1171 
1172   PetscFunctionBegin;
1173   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175 
1176   /* determine ownership of all rows */
1177   m          = M/size + ((M % size) > rank);
1178   rowners    = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1179   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1180   rowners[0] = 0;
1181   for (i=2; i<=size; i++) {
1182     rowners[i] += rowners[i-1];
1183   }
1184 
1185   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1186   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1187 
1188   if (!rank) {
1189     vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals);
1190 
1191     /* read in my part of the matrix numerical values  */
1192     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1193 
1194     /* insert into matrix-by row (this is why cannot directly read into array */
1195     vals_ptr = vals;
1196     for (i=0; i<m; i++) {
1197       for (j=0; j<N; j++) {
1198         array[i + j*m] = *vals_ptr++;
1199       }
1200     }
1201 
1202     /* read in other processors and ship out */
1203     for (i=1; i<size; i++) {
1204       nz   = (rowners[i+1] - rowners[i])*N;
1205       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1206       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1207     }
1208   } else {
1209     /* receive numeric values */
1210     vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals);
1211 
1212     /* receive message of values*/
1213     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1214 
1215     /* insert into matrix-by row (this is why cannot directly read into array */
1216     vals_ptr = vals;
1217     for (i=0; i<m; i++) {
1218       for (j=0; j<N; j++) {
1219         array[i + j*m] = *vals_ptr++;
1220       }
1221     }
1222   }
1223   ierr = PetscFree(rowners);CHKERRQ(ierr);
1224   ierr = PetscFree(vals);CHKERRQ(ierr);
1225   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1226   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 EXTERN_C_BEGIN
1231 #undef __FUNC__
1232 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense"
1233 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1234 {
1235   Mat          A;
1236   Scalar       *vals,*svals;
1237   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1238   MPI_Status   status;
1239   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1240   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1241   int          tag = ((PetscObject)viewer)->tag;
1242   int          i,nz,ierr,j,rstart,rend,fd;
1243 
1244   PetscFunctionBegin;
1245   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1246   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1247   if (!rank) {
1248     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1249     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1250     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1251   }
1252 
1253   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1254   M = header[1]; N = header[2]; nz = header[3];
1255 
1256   /*
1257        Handle case where matrix is stored on disk as a dense matrix
1258   */
1259   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1260     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1261     PetscFunctionReturn(0);
1262   }
1263 
1264   /* determine ownership of all rows */
1265   m          = M/size + ((M % size) > rank);
1266   rowners    = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1267   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1268   rowners[0] = 0;
1269   for (i=2; i<=size; i++) {
1270     rowners[i] += rowners[i-1];
1271   }
1272   rstart = rowners[rank];
1273   rend   = rowners[rank+1];
1274 
1275   /* distribute row lengths to all processors */
1276   ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens);
1277   offlens = ourlens + (rend-rstart);
1278   if (!rank) {
1279     rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths);
1280     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1281     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
1282     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1283     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1284     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1285   } else {
1286     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1287   }
1288 
1289   if (!rank) {
1290     /* calculate the number of nonzeros on each processor */
1291     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
1292     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1293     for (i=0; i<size; i++) {
1294       for (j=rowners[i]; j< rowners[i+1]; j++) {
1295         procsnz[i] += rowlengths[j];
1296       }
1297     }
1298     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1299 
1300     /* determine max buffer needed and allocate it */
1301     maxnz = 0;
1302     for (i=0; i<size; i++) {
1303       maxnz = PetscMax(maxnz,procsnz[i]);
1304     }
1305     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
1306 
1307     /* read in my part of the matrix column indices  */
1308     nz = procsnz[0];
1309     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1310     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1311 
1312     /* read in every one elses and ship off */
1313     for (i=1; i<size; i++) {
1314       nz   = procsnz[i];
1315       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1316       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1317     }
1318     ierr = PetscFree(cols);CHKERRQ(ierr);
1319   } else {
1320     /* determine buffer space needed for message */
1321     nz = 0;
1322     for (i=0; i<m; i++) {
1323       nz += ourlens[i];
1324     }
1325     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1326 
1327     /* receive message of column indices*/
1328     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1329     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1330     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1331   }
1332 
1333   /* loop over local rows, determining number of off diagonal entries */
1334   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1335   jj = 0;
1336   for (i=0; i<m; i++) {
1337     for (j=0; j<ourlens[i]; j++) {
1338       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1339       jj++;
1340     }
1341   }
1342 
1343   /* create our matrix */
1344   for (i=0; i<m; i++) {
1345     ourlens[i] -= offlens[i];
1346   }
1347   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1348   A = *newmat;
1349   for (i=0; i<m; i++) {
1350     ourlens[i] += offlens[i];
1351   }
1352 
1353   if (!rank) {
1354     vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals);
1355 
1356     /* read in my part of the matrix numerical values  */
1357     nz = procsnz[0];
1358     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1359 
1360     /* insert into matrix */
1361     jj      = rstart;
1362     smycols = mycols;
1363     svals   = vals;
1364     for (i=0; i<m; i++) {
1365       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1366       smycols += ourlens[i];
1367       svals   += ourlens[i];
1368       jj++;
1369     }
1370 
1371     /* read in other processors and ship out */
1372     for (i=1; i<size; i++) {
1373       nz   = procsnz[i];
1374       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1375       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1376     }
1377     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1378   } else {
1379     /* receive numeric values */
1380     vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals);
1381 
1382     /* receive message of values*/
1383     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1384     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1385     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1386 
1387     /* insert into matrix */
1388     jj      = rstart;
1389     smycols = mycols;
1390     svals   = vals;
1391     for (i=0; i<m; i++) {
1392       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1393       smycols += ourlens[i];
1394       svals   += ourlens[i];
1395       jj++;
1396     }
1397   }
1398   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1399   ierr = PetscFree(vals);CHKERRQ(ierr);
1400   ierr = PetscFree(mycols);CHKERRQ(ierr);
1401   ierr = PetscFree(rowners);CHKERRQ(ierr);
1402 
1403   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1404   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 EXTERN_C_END
1408 
1409 
1410 
1411 
1412 
1413