xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision b5758dff40dc4232f7fa9713b5481f7e0e0d5d24)
1 /*$Id: mpidense.c,v 1.149 2001/01/19 23:20:28 balay Exp bsmith $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNC__
12 #define __FUNC__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,rank,ierr;
17   Scalar       *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
27   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
28   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
29   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
30   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
33 
34   *iscopy = PETSC_TRUE;
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN int MatSetUpMultiply_MPIDense(Mat);
40 
41 #undef __FUNC__
42 #define __FUNC__ "MatSetValues_MPIDense"
43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
44 {
45   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
46   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
47   PetscTruth   roworiented = A->roworiented;
48 
49   PetscFunctionBegin;
50   for (i=0; i<m; i++) {
51     if (idxm[i] < 0) continue;
52     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
53     if (idxm[i] >= rstart && idxm[i] < rend) {
54       row = idxm[i] - rstart;
55       if (roworiented) {
56         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
57       } else {
58         for (j=0; j<n; j++) {
59           if (idxn[j] < 0) continue;
60           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
61           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
62         }
63       }
64     } else {
65       if (!A->donotstash) {
66         if (roworiented) {
67           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
68         } else {
69           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
70         }
71       }
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetValues_MPIDense"
79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
80 {
81   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
82   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
83 
84   PetscFunctionBegin;
85   for (i=0; i<m; i++) {
86     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
87     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
88     if (idxm[i] >= rstart && idxm[i] < rend) {
89       row = idxm[i] - rstart;
90       for (j=0; j<n; j++) {
91         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
92         if (idxn[j] >= mat->N) {
93           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
94         }
95         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
96       }
97     } else {
98       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
99     }
100   }
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNC__
105 #define __FUNC__ "MatGetArray_MPIDense"
106 int MatGetArray_MPIDense(Mat A,Scalar **array)
107 {
108   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
109   int          ierr;
110 
111   PetscFunctionBegin;
112   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNC__
117 #define __FUNC__ "MatGetSubMatrix_MPIDense"
118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
119 {
120   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
121   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
122   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols,rank;
123   Scalar       *av,*bv,*v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
128   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
129   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
130   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
131   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
132 
133   /* No parallel redistribution currently supported! Should really check each index set
134      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
135      original matrix! */
136 
137   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
138   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
139 
140   /* Check submatrix call */
141   if (scall == MAT_REUSE_MATRIX) {
142     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
143     /* Really need to test rows and column sizes! */
144     newmat = *B;
145   } else {
146     /* Create and fill new matrix */
147     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
148   }
149 
150   /* Now extract the data pointers and do the copy, column at a time */
151   newmatd = (Mat_MPIDense*)newmat->data;
152   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
153 
154   for (i=0; i<ncols; i++) {
155     av = v + nlrows*icol[i];
156     for (j=0; j<nrows; j++) {
157       *bv++ = av[irow[j] - rstart];
158     }
159   }
160 
161   /* Assemble the matrices so that the correct flags are set */
162   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
163   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164 
165   /* Free work space */
166   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
167   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
168   *B = newmat;
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNC__
173 #define __FUNC__ "MatRestoreArray_MPIDense"
174 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
175 {
176   PetscFunctionBegin;
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNC__
181 #define __FUNC__ "MatAssemblyBegin_MPIDense"
182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
183 {
184   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
185   MPI_Comm     comm = mat->comm;
186   int          ierr,nstash,reallocs;
187   InsertMode   addv;
188 
189   PetscFunctionBegin;
190   /* make sure all processors are either in INSERTMODE or ADDMODE */
191   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
192   if (addv == (ADD_VALUES|INSERT_VALUES)) {
193     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
194   }
195   mat->insertmode = addv; /* in case this processor had no cache */
196 
197   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
198   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
199   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ "MatAssemblyEnd_MPIDense"
205 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
206 {
207   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
208   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
209   Scalar       *val;
210   InsertMode   addv=mat->insertmode;
211 
212   PetscFunctionBegin;
213   /*  wait on receives */
214   while (1) {
215     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
216     if (!flg) break;
217 
218     for (i=0; i<n;) {
219       /* Now identify the consecutive vals belonging to the same row */
220       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
221       if (j < n) ncols = j-i;
222       else       ncols = n-i;
223       /* Now assemble all these values with a single function call */
224       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
225       i = j;
226     }
227   }
228   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
229 
230   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNC__
240 #define __FUNC__ "MatZeroEntries_MPIDense"
241 int MatZeroEntries_MPIDense(Mat A)
242 {
243   int          ierr;
244   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
245 
246   PetscFunctionBegin;
247   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNC__
252 #define __FUNC__ "MatGetBlockSize_MPIDense"
253 int MatGetBlockSize_MPIDense(Mat A,int *bs)
254 {
255   PetscFunctionBegin;
256   *bs = 1;
257   PetscFunctionReturn(0);
258 }
259 
260 /* the code does not do the diagonal entries correctly unless the
261    matrix is square and the column and row owerships are identical.
262    This is a BUG. The only way to fix it seems to be to access
263    mdn->A and mdn->B directly and not through the MatZeroRows()
264    routine.
265 */
266 #undef __FUNC__
267 #define __FUNC__ "MatZeroRows_MPIDense"
268 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
269 {
270   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
271   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
272   int            *procs,*nprocs,j,idx,nsends,*work;
273   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
274   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
275   int            *lens,imdex,*lrows,*values;
276   MPI_Comm       comm = A->comm;
277   MPI_Request    *send_waits,*recv_waits;
278   MPI_Status     recv_status,*send_status;
279   IS             istmp;
280   PetscTruth     found;
281 
282   PetscFunctionBegin;
283   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
284   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
285 
286   /*  first count number of contributors to each processor */
287   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
288   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
289   procs = nprocs + size;
290   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
291   for (i=0; i<N; i++) {
292     idx = rows[i];
293     found = PETSC_FALSE;
294     for (j=0; j<size; j++) {
295       if (idx >= owners[j] && idx < owners[j+1]) {
296         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
297       }
298     }
299     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
300   }
301   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
302 
303   /* inform other processors of number of messages and max length*/
304   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
305   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
306   nmax   = work[rank];
307   nrecvs = work[size+rank];
308   ierr   = PetscFree(work);CHKERRQ(ierr);
309 
310   /* post receives:   */
311   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
312   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
313   for (i=0; i<nrecvs; i++) {
314     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
315   }
316 
317   /* do sends:
318       1) starts[i] gives the starting index in svalues for stuff going to
319          the ith processor
320   */
321   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
322   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
323   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
324   starts[0]  = 0;
325   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
326   for (i=0; i<N; i++) {
327     svalues[starts[owner[i]]++] = rows[i];
328   }
329   ISRestoreIndices(is,&rows);
330 
331   starts[0] = 0;
332   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
333   count = 0;
334   for (i=0; i<size; i++) {
335     if (procs[i]) {
336       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
337     }
338   }
339   ierr = PetscFree(starts);CHKERRQ(ierr);
340 
341   base = owners[rank];
342 
343   /*  wait on receives */
344   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
345   source = lens + nrecvs;
346   count  = nrecvs; slen = 0;
347   while (count) {
348     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
349     /* unpack receives into our local space */
350     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
351     source[imdex]  = recv_status.MPI_SOURCE;
352     lens[imdex]  = n;
353     slen += n;
354     count--;
355   }
356   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
357 
358   /* move the data into the send scatter */
359   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
360   count = 0;
361   for (i=0; i<nrecvs; i++) {
362     values = rvalues + i*nmax;
363     for (j=0; j<lens[i]; j++) {
364       lrows[count++] = values[j] - base;
365     }
366   }
367   ierr = PetscFree(rvalues);CHKERRQ(ierr);
368   ierr = PetscFree(lens);CHKERRQ(ierr);
369   ierr = PetscFree(owner);CHKERRQ(ierr);
370   ierr = PetscFree(nprocs);CHKERRQ(ierr);
371 
372   /* actually zap the local rows */
373   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
374   PetscLogObjectParent(A,istmp);
375   ierr = PetscFree(lrows);CHKERRQ(ierr);
376   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
377   ierr = ISDestroy(istmp);CHKERRQ(ierr);
378 
379   /* wait on sends */
380   if (nsends) {
381     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
382     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
383     ierr = PetscFree(send_status);CHKERRQ(ierr);
384   }
385   ierr = PetscFree(send_waits);CHKERRQ(ierr);
386   ierr = PetscFree(svalues);CHKERRQ(ierr);
387 
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNC__
392 #define __FUNC__ "MatMult_MPIDense"
393 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
394 {
395   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
396   int          ierr;
397 
398   PetscFunctionBegin;
399   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
401   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNC__
406 #define __FUNC__ "MatMultAdd_MPIDense"
407 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
408 {
409   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
410   int          ierr;
411 
412   PetscFunctionBegin;
413   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
415   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNC__
420 #define __FUNC__ "MatMultTranspose_MPIDense"
421 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
422 {
423   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
424   int          ierr;
425   Scalar       zero = 0.0;
426 
427   PetscFunctionBegin;
428   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
429   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
430   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
431   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 #undef __FUNC__
436 #define __FUNC__ "MatMultTransposeAdd_MPIDense"
437 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
438 {
439   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
440   int          ierr;
441 
442   PetscFunctionBegin;
443   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
444   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
445   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNC__
451 #define __FUNC__ "MatGetDiagonal_MPIDense"
452 int MatGetDiagonal_MPIDense(Mat A,Vec v)
453 {
454   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
455   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
456   int          ierr,len,i,n,m = A->m,radd;
457   Scalar       *x,zero = 0.0;
458 
459   PetscFunctionBegin;
460   ierr = VecSet(&zero,v);CHKERRQ(ierr);
461   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
462   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
463   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
464   len  = PetscMin(a->A->m,a->A->n);
465   radd = a->rstart*m;
466   for (i=0; i<len; i++) {
467     x[i] = aloc->v[radd + i*m + i];
468   }
469   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
470   PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNC__
474 #define __FUNC__ "MatDestroy_MPIDense"
475 int MatDestroy_MPIDense(Mat mat)
476 {
477   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
478   int          ierr;
479 
480   PetscFunctionBegin;
481 
482 #if defined(PETSC_USE_LOG)
483   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
484 #endif
485   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
486   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
487   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
488   if (mdn->lvec)   VecDestroy(mdn->lvec);
489   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
490   if (mdn->factor) {
491     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
492     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
493     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
494     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
495   }
496   ierr = PetscFree(mdn);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNC__
501 #define __FUNC__ "MatView_MPIDense_Binary"
502 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
503 {
504   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
505   int          ierr;
506 
507   PetscFunctionBegin;
508   if (mdn->size == 1) {
509     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
510   }
511   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
512   PetscFunctionReturn(0);
513 }
514 
515 #undef __FUNC__
516 #define __FUNC__ "MatView_MPIDense_ASCIIorDraworSocket"
517 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
518 {
519   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
520   int               ierr,size = mdn->size,rank = mdn->rank;
521   PetscViewerType   vtype;
522   PetscTruth        isascii,isdraw;
523   PetscViewer       sviewer;
524   PetscViewerFormat format;
525 
526   PetscFunctionBegin;
527   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
529   if (isascii) {
530     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
531     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
532     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
533       MatInfo info;
534       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
535       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
536                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
537       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
538       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
539       PetscFunctionReturn(0);
540     } else if (format == PETSC_VIEWER_ASCII_INFO) {
541       PetscFunctionReturn(0);
542     }
543   } else if (isdraw) {
544     PetscDraw       draw;
545     PetscTruth isnull;
546 
547     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
548     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
549     if (isnull) PetscFunctionReturn(0);
550   }
551 
552   if (size == 1) {
553     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
554   } else {
555     /* assemble the entire matrix onto first processor. */
556     Mat          A;
557     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
558     Scalar       *vals;
559 
560     if (!rank) {
561       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
562     } else {
563       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
564     }
565     PetscLogObjectParent(mat,A);
566 
567     /* Copy the matrix ... This isn't the most efficient means,
568        but it's quick for now */
569     row = mdn->rstart; m = mdn->A->m;
570     for (i=0; i<m; i++) {
571       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
572       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
573       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
574       row++;
575     }
576 
577     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
578     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
579     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
580     if (!rank) {
581       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
582     }
583     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
584     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
585     ierr = MatDestroy(A);CHKERRQ(ierr);
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNC__
591 #define __FUNC__ "MatView_MPIDense"
592 int MatView_MPIDense(Mat mat,PetscViewer viewer)
593 {
594   int        ierr;
595   PetscTruth isascii,isbinary,isdraw,issocket;
596 
597   PetscFunctionBegin;
598 
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
601   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
602   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
603 
604   if (isascii || issocket || isdraw) {
605     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
606   } else if (isbinary) {
607     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
608   } else {
609     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 #undef __FUNC__
615 #define __FUNC__ "MatGetInfo_MPIDense"
616 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
617 {
618   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
619   Mat          mdn = mat->A;
620   int          ierr;
621   PetscReal    isend[5],irecv[5];
622 
623   PetscFunctionBegin;
624   info->rows_global    = (double)A->M;
625   info->columns_global = (double)A->N;
626   info->rows_local     = (double)A->m;
627   info->columns_local  = (double)A->N;
628   info->block_size     = 1.0;
629   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
630   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
631   isend[3] = info->memory;  isend[4] = info->mallocs;
632   if (flag == MAT_LOCAL) {
633     info->nz_used      = isend[0];
634     info->nz_allocated = isend[1];
635     info->nz_unneeded  = isend[2];
636     info->memory       = isend[3];
637     info->mallocs      = isend[4];
638   } else if (flag == MAT_GLOBAL_MAX) {
639     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
640     info->nz_used      = irecv[0];
641     info->nz_allocated = irecv[1];
642     info->nz_unneeded  = irecv[2];
643     info->memory       = irecv[3];
644     info->mallocs      = irecv[4];
645   } else if (flag == MAT_GLOBAL_SUM) {
646     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
647     info->nz_used      = irecv[0];
648     info->nz_allocated = irecv[1];
649     info->nz_unneeded  = irecv[2];
650     info->memory       = irecv[3];
651     info->mallocs      = irecv[4];
652   }
653   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
654   info->fill_ratio_needed = 0;
655   info->factor_mallocs    = 0;
656   PetscFunctionReturn(0);
657 }
658 
659 #undef __FUNC__
660 #define __FUNC__ "MatSetOption_MPIDense"
661 int MatSetOption_MPIDense(Mat A,MatOption op)
662 {
663   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
664   int          ierr;
665 
666   PetscFunctionBegin;
667   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
668       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
669       op == MAT_NEW_NONZERO_LOCATION_ERR ||
670       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
671       op == MAT_COLUMNS_SORTED ||
672       op == MAT_COLUMNS_UNSORTED) {
673         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
674   } else if (op == MAT_ROW_ORIENTED) {
675         a->roworiented = PETSC_TRUE;
676         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
677   } else if (op == MAT_ROWS_SORTED ||
678              op == MAT_ROWS_UNSORTED ||
679              op == MAT_SYMMETRIC ||
680              op == MAT_STRUCTURALLY_SYMMETRIC ||
681              op == MAT_YES_NEW_DIAGONALS ||
682              op == MAT_USE_HASH_TABLE) {
683     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
684   } else if (op == MAT_COLUMN_ORIENTED) {
685     a->roworiented = PETSC_FALSE;
686     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
687   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
688     a->donotstash = PETSC_TRUE;
689   } else if (op == MAT_NO_NEW_DIAGONALS) {
690     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
691   } else {
692     SETERRQ(PETSC_ERR_SUP,"unknown option");
693   }
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNC__
698 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
699 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
700 {
701   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
702 
703   PetscFunctionBegin;
704   if (m) *m = mat->rstart;
705   if (n) *n = mat->rend;
706   PetscFunctionReturn(0);
707 }
708 
709 #undef __FUNC__
710 #define __FUNC__ "MatGetRow_MPIDense"
711 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
712 {
713   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
714   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
715 
716   PetscFunctionBegin;
717   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
718   lrow = row - rstart;
719   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNC__
724 #define __FUNC__ "MatRestoreRow_MPIDense"
725 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
726 {
727   int ierr;
728 
729   PetscFunctionBegin;
730   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
731   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNC__
736 #define __FUNC__ "MatDiagonalScale_MPIDense"
737 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
738 {
739   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
740   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
741   Scalar       *l,*r,x,*v;
742   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
743 
744   PetscFunctionBegin;
745   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
746   if (ll) {
747     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
748     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
749     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
750     for (i=0; i<m; i++) {
751       x = l[i];
752       v = mat->v + i;
753       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
754     }
755     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
756     PetscLogFlops(n*m);
757   }
758   if (rr) {
759     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
760     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
761     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
762     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
763     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
764     for (i=0; i<n; i++) {
765       x = r[i];
766       v = mat->v + i*m;
767       for (j=0; j<m; j++) { (*v++) *= x;}
768     }
769     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
770     PetscLogFlops(n*m);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNC__
776 #define __FUNC__ "MatNorm_MPIDense"
777 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm)
778 {
779   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
780   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
781   int          ierr,i,j;
782   PetscReal    sum = 0.0;
783   Scalar       *v = mat->v;
784 
785   PetscFunctionBegin;
786   if (mdn->size == 1) {
787     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
788   } else {
789     if (type == NORM_FROBENIUS) {
790       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
791 #if defined(PETSC_USE_COMPLEX)
792         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
793 #else
794         sum += (*v)*(*v); v++;
795 #endif
796       }
797       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
798       *norm = sqrt(*norm);
799       PetscLogFlops(2*mdn->A->n*mdn->A->m);
800     } else if (type == NORM_1) {
801       PetscReal *tmp,*tmp2;
802       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
803       tmp2 = tmp + A->N;
804       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
805       *norm = 0.0;
806       v = mat->v;
807       for (j=0; j<mdn->A->n; j++) {
808         for (i=0; i<mdn->A->m; i++) {
809           tmp[j] += PetscAbsScalar(*v);  v++;
810         }
811       }
812       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
813       for (j=0; j<A->N; j++) {
814         if (tmp2[j] > *norm) *norm = tmp2[j];
815       }
816       ierr = PetscFree(tmp);CHKERRQ(ierr);
817       PetscLogFlops(A->n*A->m);
818     } else if (type == NORM_INFINITY) { /* max row norm */
819       PetscReal ntemp;
820       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
821       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
822     } else {
823       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
824     }
825   }
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNC__
830 #define __FUNC__ "MatTranspose_MPIDense"
831 int MatTranspose_MPIDense(Mat A,Mat *matout)
832 {
833   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
834   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
835   Mat          B;
836   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
837   int          j,i,ierr;
838   Scalar       *v;
839 
840   PetscFunctionBegin;
841   if (!matout && M != N) {
842     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
843   }
844   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
845 
846   m = a->A->m; n = a->A->n; v = Aloc->v;
847   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
848   for (j=0; j<n; j++) {
849     for (i=0; i<m; i++) rwork[i] = rstart + i;
850     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
851     v   += m;
852   }
853   ierr = PetscFree(rwork);CHKERRQ(ierr);
854   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
855   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856   if (matout) {
857     *matout = B;
858   } else {
859     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
860   }
861   PetscFunctionReturn(0);
862 }
863 
864 #include "petscblaslapack.h"
865 #undef __FUNC__
866 #define __FUNC__ "MatScale_MPIDense"
867 int MatScale_MPIDense(Scalar *alpha,Mat inA)
868 {
869   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
870   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
871   int          one = 1,nz;
872 
873   PetscFunctionBegin;
874   nz = inA->m*inA->N;
875   BLscal_(&nz,alpha,a->v,&one);
876   PetscLogFlops(nz);
877   PetscFunctionReturn(0);
878 }
879 
880 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
881 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
882 
883 #undef __FUNC__
884 #define __FUNC__ "MatSetUpPreallocation_MPIDense"
885 int MatSetUpPreallocation_MPIDense(Mat A)
886 {
887   int        ierr;
888 
889   PetscFunctionBegin;
890   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
891   PetscFunctionReturn(0);
892 }
893 
894 /* -------------------------------------------------------------------*/
895 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
896        MatGetRow_MPIDense,
897        MatRestoreRow_MPIDense,
898        MatMult_MPIDense,
899        MatMultAdd_MPIDense,
900        MatMultTranspose_MPIDense,
901        MatMultTransposeAdd_MPIDense,
902        0,
903        0,
904        0,
905        0,
906        0,
907        0,
908        0,
909        MatTranspose_MPIDense,
910        MatGetInfo_MPIDense,0,
911        MatGetDiagonal_MPIDense,
912        MatDiagonalScale_MPIDense,
913        MatNorm_MPIDense,
914        MatAssemblyBegin_MPIDense,
915        MatAssemblyEnd_MPIDense,
916        0,
917        MatSetOption_MPIDense,
918        MatZeroEntries_MPIDense,
919        MatZeroRows_MPIDense,
920        0,
921        0,
922        0,
923        0,
924        MatSetUpPreallocation_MPIDense,
925        0,
926        MatGetOwnershipRange_MPIDense,
927        0,
928        0,
929        MatGetArray_MPIDense,
930        MatRestoreArray_MPIDense,
931        MatDuplicate_MPIDense,
932        0,
933        0,
934        0,
935        0,
936        0,
937        MatGetSubMatrices_MPIDense,
938        0,
939        MatGetValues_MPIDense,
940        0,
941        0,
942        MatScale_MPIDense,
943        0,
944        0,
945        0,
946        MatGetBlockSize_MPIDense,
947        0,
948        0,
949        0,
950        0,
951        0,
952        0,
953        0,
954        0,
955        0,
956        MatGetSubMatrix_MPIDense,
957        MatDestroy_MPIDense,
958        MatView_MPIDense,
959        MatGetMaps_Petsc};
960 
961 EXTERN_C_BEGIN
962 #undef __FUNC__
963 #define __FUNC__ "MatCreate_MPIDense"
964 int MatCreate_MPIDense(Mat mat)
965 {
966   Mat_MPIDense *a;
967   int          ierr,i;
968 
969   PetscFunctionBegin;
970   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
971   mat->data         = (void*)a;
972   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
973   mat->factor       = 0;
974   mat->mapping      = 0;
975 
976   a->factor       = 0;
977   mat->insertmode = NOT_SET_VALUES;
978   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
979   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
980 
981   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
982   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
983   a->nvec = mat->n;
984 
985   /* the information in the maps duplicates the information computed below, eventually
986      we should remove the duplicate information that is not contained in the maps */
987   ierr = MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
988   ierr = MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
989 
990   /* build local table of row and column ownerships */
991   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
992   a->cowners = a->rowners + a->size + 1;
993   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
994   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
995   a->rowners[0] = 0;
996   for (i=2; i<=a->size; i++) {
997     a->rowners[i] += a->rowners[i-1];
998   }
999   a->rstart = a->rowners[a->rank];
1000   a->rend   = a->rowners[a->rank+1];
1001   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1002   a->cowners[0] = 0;
1003   for (i=2; i<=a->size; i++) {
1004     a->cowners[i] += a->cowners[i-1];
1005   }
1006 
1007   /* build cache for off array entries formed */
1008   a->donotstash = PETSC_FALSE;
1009   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1010 
1011   /* stuff used for matrix vector multiply */
1012   a->lvec        = 0;
1013   a->Mvctx       = 0;
1014   a->roworiented = PETSC_TRUE;
1015 
1016   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1017                                      "MatGetDiagonalBlock_MPIDense",
1018                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 EXTERN_C_END
1022 
1023 #undef __FUNC__
1024 #define __FUNC__ "MatMPIDenseSetPreallocation"
1025 /*@C
1026    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1027 
1028    Not collective
1029 
1030    Input Parameters:
1031 .  A - the matrix
1032 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1033    to control all matrix memory allocation.
1034 
1035    Notes:
1036    The dense format is fully compatible with standard Fortran 77
1037    storage by columns.
1038 
1039    The data input variable is intended primarily for Fortran programmers
1040    who wish to allocate their own matrix memory space.  Most users should
1041    set data=PETSC_NULL.
1042 
1043    Level: intermediate
1044 
1045 .keywords: matrix,dense, parallel
1046 
1047 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1048 @*/
1049 int MatMPIDenseSetPreallocation(Mat mat,Scalar *data)
1050 {
1051   Mat_MPIDense *a;
1052   int          ierr;
1053   PetscTruth   flg2;
1054 
1055   PetscFunctionBegin;
1056   ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr);
1057   if (!flg2) PetscFunctionReturn(0);
1058   mat->preallocated = PETSC_TRUE;
1059   /* Note:  For now, when data is specified above, this assumes the user correctly
1060    allocates the local dense storage space.  We should add error checking. */
1061 
1062   a    = (Mat_MPIDense*)mat->data;
1063   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
1064   PetscLogObjectParent(mat,a->A);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNC__
1069 #define __FUNC__ "MatCreateMPIDense"
1070 /*@C
1071    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1072 
1073    Collective on MPI_Comm
1074 
1075    Input Parameters:
1076 +  comm - MPI communicator
1077 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1078 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1079 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1080 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1081 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1082    to control all matrix memory allocation.
1083 
1084    Output Parameter:
1085 .  A - the matrix
1086 
1087    Notes:
1088    The dense format is fully compatible with standard Fortran 77
1089    storage by columns.
1090 
1091    The data input variable is intended primarily for Fortran programmers
1092    who wish to allocate their own matrix memory space.  Most users should
1093    set data=PETSC_NULL.
1094 
1095    The user MUST specify either the local or global matrix dimensions
1096    (possibly both).
1097 
1098    Level: intermediate
1099 
1100 .keywords: matrix,dense, parallel
1101 
1102 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1103 @*/
1104 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1105 {
1106   int ierr,size;
1107 
1108   PetscFunctionBegin;
1109   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1111   if (size > 1) {
1112     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1113     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1114   } else {
1115     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1116     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1117   }
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNC__
1122 #define __FUNC__ "MatDuplicate_MPIDense"
1123 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1124 {
1125   Mat          mat;
1126   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1127   int          ierr;
1128 
1129   PetscFunctionBegin;
1130   *newmat       = 0;
1131   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1132   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1133   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1134   mat->data         = (void*)a;
1135   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1136   mat->factor       = A->factor;
1137   mat->assembled    = PETSC_TRUE;
1138   mat->preallocated = PETSC_TRUE;
1139 
1140   a->rstart       = oldmat->rstart;
1141   a->rend         = oldmat->rend;
1142   a->size         = oldmat->size;
1143   a->rank         = oldmat->rank;
1144   mat->insertmode = NOT_SET_VALUES;
1145   a->nvec         = oldmat->nvec;
1146   a->donotstash   = oldmat->donotstash;
1147   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1148   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1149   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1150   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1151 
1152   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1153   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1154   PetscLogObjectParent(mat,a->A);
1155   *newmat = mat;
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #include "petscsys.h"
1160 
1161 #undef __FUNC__
1162 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1163 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1164 {
1165   int        *rowners,i,size,rank,m,ierr,nz,j;
1166   Scalar     *array,*vals,*vals_ptr;
1167   MPI_Status status;
1168 
1169   PetscFunctionBegin;
1170   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1171   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1172 
1173   /* determine ownership of all rows */
1174   m          = M/size + ((M % size) > rank);
1175   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1176   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1177   rowners[0] = 0;
1178   for (i=2; i<=size; i++) {
1179     rowners[i] += rowners[i-1];
1180   }
1181 
1182   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1183   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1184 
1185   if (!rank) {
1186     ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr);
1187 
1188     /* read in my part of the matrix numerical values  */
1189     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1190 
1191     /* insert into matrix-by row (this is why cannot directly read into array */
1192     vals_ptr = vals;
1193     for (i=0; i<m; i++) {
1194       for (j=0; j<N; j++) {
1195         array[i + j*m] = *vals_ptr++;
1196       }
1197     }
1198 
1199     /* read in other processors and ship out */
1200     for (i=1; i<size; i++) {
1201       nz   = (rowners[i+1] - rowners[i])*N;
1202       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1203       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1204     }
1205   } else {
1206     /* receive numeric values */
1207     ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr);
1208 
1209     /* receive message of values*/
1210     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1211 
1212     /* insert into matrix-by row (this is why cannot directly read into array */
1213     vals_ptr = vals;
1214     for (i=0; i<m; i++) {
1215       for (j=0; j<N; j++) {
1216         array[i + j*m] = *vals_ptr++;
1217       }
1218     }
1219   }
1220   ierr = PetscFree(rowners);CHKERRQ(ierr);
1221   ierr = PetscFree(vals);CHKERRQ(ierr);
1222   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1223   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 EXTERN_C_BEGIN
1228 #undef __FUNC__
1229 #define __FUNC__ "MatLoad_MPIDense"
1230 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1231 {
1232   Mat          A;
1233   Scalar       *vals,*svals;
1234   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1235   MPI_Status   status;
1236   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1237   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1238   int          tag = ((PetscObject)viewer)->tag;
1239   int          i,nz,ierr,j,rstart,rend,fd;
1240 
1241   PetscFunctionBegin;
1242   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1243   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1244   if (!rank) {
1245     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1246     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1247     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1248   }
1249 
1250   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1251   M = header[1]; N = header[2]; nz = header[3];
1252 
1253   /*
1254        Handle case where matrix is stored on disk as a dense matrix
1255   */
1256   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1257     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1258     PetscFunctionReturn(0);
1259   }
1260 
1261   /* determine ownership of all rows */
1262   m          = M/size + ((M % size) > rank);
1263   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1264   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1265   rowners[0] = 0;
1266   for (i=2; i<=size; i++) {
1267     rowners[i] += rowners[i-1];
1268   }
1269   rstart = rowners[rank];
1270   rend   = rowners[rank+1];
1271 
1272   /* distribute row lengths to all processors */
1273   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1274   offlens = ourlens + (rend-rstart);
1275   if (!rank) {
1276     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1277     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1278     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1279     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1280     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1281     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1282   } else {
1283     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1284   }
1285 
1286   if (!rank) {
1287     /* calculate the number of nonzeros on each processor */
1288     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1289     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1290     for (i=0; i<size; i++) {
1291       for (j=rowners[i]; j< rowners[i+1]; j++) {
1292         procsnz[i] += rowlengths[j];
1293       }
1294     }
1295     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1296 
1297     /* determine max buffer needed and allocate it */
1298     maxnz = 0;
1299     for (i=0; i<size; i++) {
1300       maxnz = PetscMax(maxnz,procsnz[i]);
1301     }
1302     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1303 
1304     /* read in my part of the matrix column indices  */
1305     nz = procsnz[0];
1306     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1307     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1308 
1309     /* read in every one elses and ship off */
1310     for (i=1; i<size; i++) {
1311       nz   = procsnz[i];
1312       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1313       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1314     }
1315     ierr = PetscFree(cols);CHKERRQ(ierr);
1316   } else {
1317     /* determine buffer space needed for message */
1318     nz = 0;
1319     for (i=0; i<m; i++) {
1320       nz += ourlens[i];
1321     }
1322     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1323 
1324     /* receive message of column indices*/
1325     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1326     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1327     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1328   }
1329 
1330   /* loop over local rows, determining number of off diagonal entries */
1331   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1332   jj = 0;
1333   for (i=0; i<m; i++) {
1334     for (j=0; j<ourlens[i]; j++) {
1335       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1336       jj++;
1337     }
1338   }
1339 
1340   /* create our matrix */
1341   for (i=0; i<m; i++) {
1342     ourlens[i] -= offlens[i];
1343   }
1344   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1345   A = *newmat;
1346   for (i=0; i<m; i++) {
1347     ourlens[i] += offlens[i];
1348   }
1349 
1350   if (!rank) {
1351     ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr);
1352 
1353     /* read in my part of the matrix numerical values  */
1354     nz = procsnz[0];
1355     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1356 
1357     /* insert into matrix */
1358     jj      = rstart;
1359     smycols = mycols;
1360     svals   = vals;
1361     for (i=0; i<m; i++) {
1362       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1363       smycols += ourlens[i];
1364       svals   += ourlens[i];
1365       jj++;
1366     }
1367 
1368     /* read in other processors and ship out */
1369     for (i=1; i<size; i++) {
1370       nz   = procsnz[i];
1371       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1372       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1373     }
1374     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1375   } else {
1376     /* receive numeric values */
1377     ierr = PetscMalloc(nz*sizeof(Scalar),&vals);CHKERRQ(ierr);
1378 
1379     /* receive message of values*/
1380     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1381     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1382     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1383 
1384     /* insert into matrix */
1385     jj      = rstart;
1386     smycols = mycols;
1387     svals   = vals;
1388     for (i=0; i<m; i++) {
1389       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1390       smycols += ourlens[i];
1391       svals   += ourlens[i];
1392       jj++;
1393     }
1394   }
1395   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1396   ierr = PetscFree(vals);CHKERRQ(ierr);
1397   ierr = PetscFree(mycols);CHKERRQ(ierr);
1398   ierr = PetscFree(rowners);CHKERRQ(ierr);
1399 
1400   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1401   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 EXTERN_C_END
1405 
1406 
1407 
1408 
1409 
1410