xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 60625e5193e1faa2b4b90582eb81189a9b39b613)
1 /*$Id: mpidense.c,v 1.150 2001/01/20 03:34:41 bsmith Exp balay $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNC__
12 #define __FUNC__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,ierr;
17   Scalar       *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
27   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
28   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN int MatSetUpMultiply_MPIDense(Mat);
39 
40 #undef __FUNC__
41 #define __FUNC__ "MatSetValues_MPIDense"
42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
43 {
44   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
45   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
46   PetscTruth   roworiented = A->roworiented;
47 
48   PetscFunctionBegin;
49   for (i=0; i<m; i++) {
50     if (idxm[i] < 0) continue;
51     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
52     if (idxm[i] >= rstart && idxm[i] < rend) {
53       row = idxm[i] - rstart;
54       if (roworiented) {
55         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
56       } else {
57         for (j=0; j<n; j++) {
58           if (idxn[j] < 0) continue;
59           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
60           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
61         }
62       }
63     } else {
64       if (!A->donotstash) {
65         if (roworiented) {
66           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
67         } else {
68           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
69         }
70       }
71     }
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNC__
77 #define __FUNC__ "MatGetValues_MPIDense"
78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
79 {
80   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
81   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82 
83   PetscFunctionBegin;
84   for (i=0; i<m; i++) {
85     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for (j=0; j<n; j++) {
90         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91         if (idxn[j] >= mat->N) {
92           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93         }
94         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
95       }
96     } else {
97       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98     }
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNC__
104 #define __FUNC__ "MatGetArray_MPIDense"
105 int MatGetArray_MPIDense(Mat A,Scalar **array)
106 {
107   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108   int          ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNC__
116 #define __FUNC__ "MatGetSubMatrix_MPIDense"
117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118 {
119   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
122   Scalar       *av,*bv,*v = lmat->v;
123   Mat          newmat;
124 
125   PetscFunctionBegin;
126   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
127   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
128   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
129   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
130 
131   /* No parallel redistribution currently supported! Should really check each index set
132      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
133      original matrix! */
134 
135   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
136   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
137 
138   /* Check submatrix call */
139   if (scall == MAT_REUSE_MATRIX) {
140     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
141     /* Really need to test rows and column sizes! */
142     newmat = *B;
143   } else {
144     /* Create and fill new matrix */
145     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
146   }
147 
148   /* Now extract the data pointers and do the copy, column at a time */
149   newmatd = (Mat_MPIDense*)newmat->data;
150   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
151 
152   for (i=0; i<ncols; i++) {
153     av = v + nlrows*icol[i];
154     for (j=0; j<nrows; j++) {
155       *bv++ = av[irow[j] - rstart];
156     }
157   }
158 
159   /* Assemble the matrices so that the correct flags are set */
160   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
161   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
162 
163   /* Free work space */
164   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
165   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
166   *B = newmat;
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNC__
171 #define __FUNC__ "MatRestoreArray_MPIDense"
172 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
173 {
174   PetscFunctionBegin;
175   PetscFunctionReturn(0);
176 }
177 
178 #undef __FUNC__
179 #define __FUNC__ "MatAssemblyBegin_MPIDense"
180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
181 {
182   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
183   MPI_Comm     comm = mat->comm;
184   int          ierr,nstash,reallocs;
185   InsertMode   addv;
186 
187   PetscFunctionBegin;
188   /* make sure all processors are either in INSERTMODE or ADDMODE */
189   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
190   if (addv == (ADD_VALUES|INSERT_VALUES)) {
191     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
192   }
193   mat->insertmode = addv; /* in case this processor had no cache */
194 
195   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
196   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
197   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
198   PetscFunctionReturn(0);
199 }
200 
201 #undef __FUNC__
202 #define __FUNC__ "MatAssemblyEnd_MPIDense"
203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
204 {
205   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
206   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
207   Scalar       *val;
208   InsertMode   addv=mat->insertmode;
209 
210   PetscFunctionBegin;
211   /*  wait on receives */
212   while (1) {
213     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
214     if (!flg) break;
215 
216     for (i=0; i<n;) {
217       /* Now identify the consecutive vals belonging to the same row */
218       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
219       if (j < n) ncols = j-i;
220       else       ncols = n-i;
221       /* Now assemble all these values with a single function call */
222       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
223       i = j;
224     }
225   }
226   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
227 
228   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
229   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
230 
231   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
232     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNC__
238 #define __FUNC__ "MatZeroEntries_MPIDense"
239 int MatZeroEntries_MPIDense(Mat A)
240 {
241   int          ierr;
242   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
243 
244   PetscFunctionBegin;
245   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNC__
250 #define __FUNC__ "MatGetBlockSize_MPIDense"
251 int MatGetBlockSize_MPIDense(Mat A,int *bs)
252 {
253   PetscFunctionBegin;
254   *bs = 1;
255   PetscFunctionReturn(0);
256 }
257 
258 /* the code does not do the diagonal entries correctly unless the
259    matrix is square and the column and row owerships are identical.
260    This is a BUG. The only way to fix it seems to be to access
261    mdn->A and mdn->B directly and not through the MatZeroRows()
262    routine.
263 */
264 #undef __FUNC__
265 #define __FUNC__ "MatZeroRows_MPIDense"
266 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
267 {
268   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
269   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
270   int            *procs,*nprocs,j,idx,nsends,*work;
271   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
272   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
273   int            *lens,imdex,*lrows,*values;
274   MPI_Comm       comm = A->comm;
275   MPI_Request    *send_waits,*recv_waits;
276   MPI_Status     recv_status,*send_status;
277   IS             istmp;
278   PetscTruth     found;
279 
280   PetscFunctionBegin;
281   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
282   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
283 
284   /*  first count number of contributors to each processor */
285   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
286   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
287   procs = nprocs + size;
288   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
289   for (i=0; i<N; i++) {
290     idx = rows[i];
291     found = PETSC_FALSE;
292     for (j=0; j<size; j++) {
293       if (idx >= owners[j] && idx < owners[j+1]) {
294         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
295       }
296     }
297     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
298   }
299   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
300 
301   /* inform other processors of number of messages and max length*/
302   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
303   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
304   nmax   = work[rank];
305   nrecvs = work[size+rank];
306   ierr   = PetscFree(work);CHKERRQ(ierr);
307 
308   /* post receives:   */
309   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
310   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
311   for (i=0; i<nrecvs; i++) {
312     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
313   }
314 
315   /* do sends:
316       1) starts[i] gives the starting index in svalues for stuff going to
317          the ith processor
318   */
319   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
320   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
321   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
322   starts[0]  = 0;
323   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
324   for (i=0; i<N; i++) {
325     svalues[starts[owner[i]]++] = rows[i];
326   }
327   ISRestoreIndices(is,&rows);
328 
329   starts[0] = 0;
330   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
331   count = 0;
332   for (i=0; i<size; i++) {
333     if (procs[i]) {
334       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
335     }
336   }
337   ierr = PetscFree(starts);CHKERRQ(ierr);
338 
339   base = owners[rank];
340 
341   /*  wait on receives */
342   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
343   source = lens + nrecvs;
344   count  = nrecvs; slen = 0;
345   while (count) {
346     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
347     /* unpack receives into our local space */
348     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
349     source[imdex]  = recv_status.MPI_SOURCE;
350     lens[imdex]  = n;
351     slen += n;
352     count--;
353   }
354   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
355 
356   /* move the data into the send scatter */
357   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
358   count = 0;
359   for (i=0; i<nrecvs; i++) {
360     values = rvalues + i*nmax;
361     for (j=0; j<lens[i]; j++) {
362       lrows[count++] = values[j] - base;
363     }
364   }
365   ierr = PetscFree(rvalues);CHKERRQ(ierr);
366   ierr = PetscFree(lens);CHKERRQ(ierr);
367   ierr = PetscFree(owner);CHKERRQ(ierr);
368   ierr = PetscFree(nprocs);CHKERRQ(ierr);
369 
370   /* actually zap the local rows */
371   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
372   PetscLogObjectParent(A,istmp);
373   ierr = PetscFree(lrows);CHKERRQ(ierr);
374   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
375   ierr = ISDestroy(istmp);CHKERRQ(ierr);
376 
377   /* wait on sends */
378   if (nsends) {
379     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
380     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
381     ierr = PetscFree(send_status);CHKERRQ(ierr);
382   }
383   ierr = PetscFree(send_waits);CHKERRQ(ierr);
384   ierr = PetscFree(svalues);CHKERRQ(ierr);
385 
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNC__
390 #define __FUNC__ "MatMult_MPIDense"
391 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
394   int          ierr;
395 
396   PetscFunctionBegin;
397   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
398   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
400   PetscFunctionReturn(0);
401 }
402 
403 #undef __FUNC__
404 #define __FUNC__ "MatMultAdd_MPIDense"
405 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
406 {
407   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
408   int          ierr;
409 
410   PetscFunctionBegin;
411   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
412   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNC__
418 #define __FUNC__ "MatMultTranspose_MPIDense"
419 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
420 {
421   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
422   int          ierr;
423   Scalar       zero = 0.0;
424 
425   PetscFunctionBegin;
426   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
427   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
428   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
429   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNC__
434 #define __FUNC__ "MatMultTransposeAdd_MPIDense"
435 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
436 {
437   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
438   int          ierr;
439 
440   PetscFunctionBegin;
441   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
442   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
443   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
444   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNC__
449 #define __FUNC__ "MatGetDiagonal_MPIDense"
450 int MatGetDiagonal_MPIDense(Mat A,Vec v)
451 {
452   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
453   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
454   int          ierr,len,i,n,m = A->m,radd;
455   Scalar       *x,zero = 0.0;
456 
457   PetscFunctionBegin;
458   ierr = VecSet(&zero,v);CHKERRQ(ierr);
459   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
460   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
461   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
462   len  = PetscMin(a->A->m,a->A->n);
463   radd = a->rstart*m;
464   for (i=0; i<len; i++) {
465     x[i] = aloc->v[radd + i*m + i];
466   }
467   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNC__
472 #define __FUNC__ "MatDestroy_MPIDense"
473 int MatDestroy_MPIDense(Mat mat)
474 {
475   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
476   int          ierr;
477 
478   PetscFunctionBegin;
479 
480 #if defined(PETSC_USE_LOG)
481   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
482 #endif
483   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
484   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
485   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
486   if (mdn->lvec)   VecDestroy(mdn->lvec);
487   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
488   if (mdn->factor) {
489     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
490     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
491     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
492     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
493   }
494   ierr = PetscFree(mdn);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNC__
499 #define __FUNC__ "MatView_MPIDense_Binary"
500 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
501 {
502   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
503   int          ierr;
504 
505   PetscFunctionBegin;
506   if (mdn->size == 1) {
507     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
508   }
509   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNC__
514 #define __FUNC__ "MatView_MPIDense_ASCIIorDraworSocket"
515 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
516 {
517   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
518   int               ierr,size = mdn->size,rank = mdn->rank;
519   PetscViewerType   vtype;
520   PetscTruth        isascii,isdraw;
521   PetscViewer       sviewer;
522   PetscViewerFormat format;
523 
524   PetscFunctionBegin;
525   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
526   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
527   if (isascii) {
528     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
529     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
530     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
531       MatInfo info;
532       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
533       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
534                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
535       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
536       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
537       PetscFunctionReturn(0);
538     } else if (format == PETSC_VIEWER_ASCII_INFO) {
539       PetscFunctionReturn(0);
540     }
541   } else if (isdraw) {
542     PetscDraw       draw;
543     PetscTruth isnull;
544 
545     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
546     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
547     if (isnull) PetscFunctionReturn(0);
548   }
549 
550   if (size == 1) {
551     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
552   } else {
553     /* assemble the entire matrix onto first processor. */
554     Mat          A;
555     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
556     Scalar       *vals;
557 
558     if (!rank) {
559       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
560     } else {
561       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
562     }
563     PetscLogObjectParent(mat,A);
564 
565     /* Copy the matrix ... This isn't the most efficient means,
566        but it's quick for now */
567     row = mdn->rstart; m = mdn->A->m;
568     for (i=0; i<m; i++) {
569       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
570       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
571       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
572       row++;
573     }
574 
575     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
576     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
577     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
578     if (!rank) {
579       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
580     }
581     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
582     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
583     ierr = MatDestroy(A);CHKERRQ(ierr);
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNC__
589 #define __FUNC__ "MatView_MPIDense"
590 int MatView_MPIDense(Mat mat,PetscViewer viewer)
591 {
592   int        ierr;
593   PetscTruth isascii,isbinary,isdraw,issocket;
594 
595   PetscFunctionBegin;
596 
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
601 
602   if (isascii || issocket || isdraw) {
603     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
604   } else if (isbinary) {
605     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
606   } else {
607     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNC__
613 #define __FUNC__ "MatGetInfo_MPIDense"
614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
615 {
616   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
617   Mat          mdn = mat->A;
618   int          ierr;
619   PetscReal    isend[5],irecv[5];
620 
621   PetscFunctionBegin;
622   info->rows_global    = (double)A->M;
623   info->columns_global = (double)A->N;
624   info->rows_local     = (double)A->m;
625   info->columns_local  = (double)A->N;
626   info->block_size     = 1.0;
627   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
628   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
629   isend[3] = info->memory;  isend[4] = info->mallocs;
630   if (flag == MAT_LOCAL) {
631     info->nz_used      = isend[0];
632     info->nz_allocated = isend[1];
633     info->nz_unneeded  = isend[2];
634     info->memory       = isend[3];
635     info->mallocs      = isend[4];
636   } else if (flag == MAT_GLOBAL_MAX) {
637     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
638     info->nz_used      = irecv[0];
639     info->nz_allocated = irecv[1];
640     info->nz_unneeded  = irecv[2];
641     info->memory       = irecv[3];
642     info->mallocs      = irecv[4];
643   } else if (flag == MAT_GLOBAL_SUM) {
644     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
645     info->nz_used      = irecv[0];
646     info->nz_allocated = irecv[1];
647     info->nz_unneeded  = irecv[2];
648     info->memory       = irecv[3];
649     info->mallocs      = irecv[4];
650   }
651   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
652   info->fill_ratio_needed = 0;
653   info->factor_mallocs    = 0;
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNC__
658 #define __FUNC__ "MatSetOption_MPIDense"
659 int MatSetOption_MPIDense(Mat A,MatOption op)
660 {
661   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
662   int          ierr;
663 
664   PetscFunctionBegin;
665   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
666       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
667       op == MAT_NEW_NONZERO_LOCATION_ERR ||
668       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
669       op == MAT_COLUMNS_SORTED ||
670       op == MAT_COLUMNS_UNSORTED) {
671         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
672   } else if (op == MAT_ROW_ORIENTED) {
673         a->roworiented = PETSC_TRUE;
674         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
675   } else if (op == MAT_ROWS_SORTED ||
676              op == MAT_ROWS_UNSORTED ||
677              op == MAT_SYMMETRIC ||
678              op == MAT_STRUCTURALLY_SYMMETRIC ||
679              op == MAT_YES_NEW_DIAGONALS ||
680              op == MAT_USE_HASH_TABLE) {
681     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
682   } else if (op == MAT_COLUMN_ORIENTED) {
683     a->roworiented = PETSC_FALSE;
684     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
685   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
686     a->donotstash = PETSC_TRUE;
687   } else if (op == MAT_NO_NEW_DIAGONALS) {
688     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
689   } else {
690     SETERRQ(PETSC_ERR_SUP,"unknown option");
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNC__
696 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
697 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
698 {
699   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
700 
701   PetscFunctionBegin;
702   if (m) *m = mat->rstart;
703   if (n) *n = mat->rend;
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNC__
708 #define __FUNC__ "MatGetRow_MPIDense"
709 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
710 {
711   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
712   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
713 
714   PetscFunctionBegin;
715   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
716   lrow = row - rstart;
717   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNC__
722 #define __FUNC__ "MatRestoreRow_MPIDense"
723 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
724 {
725   int ierr;
726 
727   PetscFunctionBegin;
728   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
729   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
730   PetscFunctionReturn(0);
731 }
732 
733 #undef __FUNC__
734 #define __FUNC__ "MatDiagonalScale_MPIDense"
735 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
736 {
737   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
738   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
739   Scalar       *l,*r,x,*v;
740   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
741 
742   PetscFunctionBegin;
743   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
744   if (ll) {
745     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
746     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
747     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
748     for (i=0; i<m; i++) {
749       x = l[i];
750       v = mat->v + i;
751       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
752     }
753     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
754     PetscLogFlops(n*m);
755   }
756   if (rr) {
757     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
758     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
759     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
760     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
761     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
762     for (i=0; i<n; i++) {
763       x = r[i];
764       v = mat->v + i*m;
765       for (j=0; j<m; j++) { (*v++) *= x;}
766     }
767     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
768     PetscLogFlops(n*m);
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNC__
774 #define __FUNC__ "MatNorm_MPIDense"
775 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm)
776 {
777   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
778   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
779   int          ierr,i,j;
780   PetscReal    sum = 0.0;
781   Scalar       *v = mat->v;
782 
783   PetscFunctionBegin;
784   if (mdn->size == 1) {
785     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
786   } else {
787     if (type == NORM_FROBENIUS) {
788       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
789 #if defined(PETSC_USE_COMPLEX)
790         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
791 #else
792         sum += (*v)*(*v); v++;
793 #endif
794       }
795       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
796       *norm = sqrt(*norm);
797       PetscLogFlops(2*mdn->A->n*mdn->A->m);
798     } else if (type == NORM_1) {
799       PetscReal *tmp,*tmp2;
800       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
801       tmp2 = tmp + A->N;
802       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
803       *norm = 0.0;
804       v = mat->v;
805       for (j=0; j<mdn->A->n; j++) {
806         for (i=0; i<mdn->A->m; i++) {
807           tmp[j] += PetscAbsScalar(*v);  v++;
808         }
809       }
810       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
811       for (j=0; j<A->N; j++) {
812         if (tmp2[j] > *norm) *norm = tmp2[j];
813       }
814       ierr = PetscFree(tmp);CHKERRQ(ierr);
815       PetscLogFlops(A->n*A->m);
816     } else if (type == NORM_INFINITY) { /* max row norm */
817       PetscReal ntemp;
818       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
819       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
820     } else {
821       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
822     }
823   }
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNC__
828 #define __FUNC__ "MatTranspose_MPIDense"
829 int MatTranspose_MPIDense(Mat A,Mat *matout)
830 {
831   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
832   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
833   Mat          B;
834   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
835   int          j,i,ierr;
836   Scalar       *v;
837 
838   PetscFunctionBegin;
839   if (!matout && M != N) {
840     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
841   }
842   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
843 
844   m = a->A->m; n = a->A->n; v = Aloc->v;
845   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
846   for (j=0; j<n; j++) {
847     for (i=0; i<m; i++) rwork[i] = rstart + i;
848     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
849     v   += m;
850   }
851   ierr = PetscFree(rwork);CHKERRQ(ierr);
852   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   if (matout) {
855     *matout = B;
856   } else {
857     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
858   }
859   PetscFunctionReturn(0);
860 }
861 
862 #include "petscblaslapack.h"
863 #undef __FUNC__
864 #define __FUNC__ "MatScale_MPIDense"
865 int MatScale_MPIDense(Scalar *alpha,Mat inA)
866 {
867   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
868   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
869   int          one = 1,nz;
870 
871   PetscFunctionBegin;
872   nz = inA->m*inA->N;
873   BLscal_(&nz,alpha,a->v,&one);
874   PetscLogFlops(nz);
875   PetscFunctionReturn(0);
876 }
877 
878 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
879 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
880 
881 #undef __FUNC__
882 #define __FUNC__ "MatSetUpPreallocation_MPIDense"
883 int MatSetUpPreallocation_MPIDense(Mat A)
884 {
885   int        ierr;
886 
887   PetscFunctionBegin;
888   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 /* -------------------------------------------------------------------*/
893 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
894        MatGetRow_MPIDense,
895        MatRestoreRow_MPIDense,
896        MatMult_MPIDense,
897        MatMultAdd_MPIDense,
898        MatMultTranspose_MPIDense,
899        MatMultTransposeAdd_MPIDense,
900        0,
901        0,
902        0,
903        0,
904        0,
905        0,
906        0,
907        MatTranspose_MPIDense,
908        MatGetInfo_MPIDense,0,
909        MatGetDiagonal_MPIDense,
910        MatDiagonalScale_MPIDense,
911        MatNorm_MPIDense,
912        MatAssemblyBegin_MPIDense,
913        MatAssemblyEnd_MPIDense,
914        0,
915        MatSetOption_MPIDense,
916        MatZeroEntries_MPIDense,
917        MatZeroRows_MPIDense,
918        0,
919        0,
920        0,
921        0,
922        MatSetUpPreallocation_MPIDense,
923        0,
924        MatGetOwnershipRange_MPIDense,
925        0,
926        0,
927        MatGetArray_MPIDense,
928        MatRestoreArray_MPIDense,
929        MatDuplicate_MPIDense,
930        0,
931        0,
932        0,
933        0,
934        0,
935        MatGetSubMatrices_MPIDense,
936        0,
937        MatGetValues_MPIDense,
938        0,
939        0,
940        MatScale_MPIDense,
941        0,
942        0,
943        0,
944        MatGetBlockSize_MPIDense,
945        0,
946        0,
947        0,
948        0,
949        0,
950        0,
951        0,
952        0,
953        0,
954        MatGetSubMatrix_MPIDense,
955        MatDestroy_MPIDense,
956        MatView_MPIDense,
957        MatGetMaps_Petsc};
958 
959 EXTERN_C_BEGIN
960 #undef __FUNC__
961 #define __FUNC__ "MatCreate_MPIDense"
962 int MatCreate_MPIDense(Mat mat)
963 {
964   Mat_MPIDense *a;
965   int          ierr,i;
966 
967   PetscFunctionBegin;
968   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
969   mat->data         = (void*)a;
970   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
971   mat->factor       = 0;
972   mat->mapping      = 0;
973 
974   a->factor       = 0;
975   mat->insertmode = NOT_SET_VALUES;
976   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
977   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
978 
979   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
980   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
981   a->nvec = mat->n;
982 
983   /* the information in the maps duplicates the information computed below, eventually
984      we should remove the duplicate information that is not contained in the maps */
985   ierr = MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
986   ierr = MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
987 
988   /* build local table of row and column ownerships */
989   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
990   a->cowners = a->rowners + a->size + 1;
991   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
992   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
993   a->rowners[0] = 0;
994   for (i=2; i<=a->size; i++) {
995     a->rowners[i] += a->rowners[i-1];
996   }
997   a->rstart = a->rowners[a->rank];
998   a->rend   = a->rowners[a->rank+1];
999   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1000   a->cowners[0] = 0;
1001   for (i=2; i<=a->size; i++) {
1002     a->cowners[i] += a->cowners[i-1];
1003   }
1004 
1005   /* build cache for off array entries formed */
1006   a->donotstash = PETSC_FALSE;
1007   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1008 
1009   /* stuff used for matrix vector multiply */
1010   a->lvec        = 0;
1011   a->Mvctx       = 0;
1012   a->roworiented = PETSC_TRUE;
1013 
1014   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1015                                      "MatGetDiagonalBlock_MPIDense",
1016                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 EXTERN_C_END
1020 
1021 #undef __FUNC__
1022 #define __FUNC__ "MatMPIDenseSetPreallocation"
1023 /*@C
1024    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1025 
1026    Not collective
1027 
1028    Input Parameters:
1029 .  A - the matrix
1030 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1031    to control all matrix memory allocation.
1032 
1033    Notes:
1034    The dense format is fully compatible with standard Fortran 77
1035    storage by columns.
1036 
1037    The data input variable is intended primarily for Fortran programmers
1038    who wish to allocate their own matrix memory space.  Most users should
1039    set data=PETSC_NULL.
1040 
1041    Level: intermediate
1042 
1043 .keywords: matrix,dense, parallel
1044 
1045 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1046 @*/
1047 int MatMPIDenseSetPreallocation(Mat mat,Scalar *data)
1048 {
1049   Mat_MPIDense *a;
1050   int          ierr;
1051   PetscTruth   flg2;
1052 
1053   PetscFunctionBegin;
1054   ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr);
1055   if (!flg2) PetscFunctionReturn(0);
1056   mat->preallocated = PETSC_TRUE;
1057   /* Note:  For now, when data is specified above, this assumes the user correctly
1058    allocates the local dense storage space.  We should add error checking. */
1059 
1060   a    = (Mat_MPIDense*)mat->data;
1061   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
1062   PetscLogObjectParent(mat,a->A);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNC__
1067 #define __FUNC__ "MatCreateMPIDense"
1068 /*@C
1069    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1070 
1071    Collective on MPI_Comm
1072 
1073    Input Parameters:
1074 +  comm - MPI communicator
1075 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1076 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1077 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1078 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1079 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1080    to control all matrix memory allocation.
1081 
1082    Output Parameter:
1083 .  A - the matrix
1084 
1085    Notes:
1086    The dense format is fully compatible with standard Fortran 77
1087    storage by columns.
1088 
1089    The data input variable is intended primarily for Fortran programmers
1090    who wish to allocate their own matrix memory space.  Most users should
1091    set data=PETSC_NULL.
1092 
1093    The user MUST specify either the local or global matrix dimensions
1094    (possibly both).
1095 
1096    Level: intermediate
1097 
1098 .keywords: matrix,dense, parallel
1099 
1100 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1101 @*/
1102 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1103 {
1104   int ierr,size;
1105 
1106   PetscFunctionBegin;
1107   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1108   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1109   if (size > 1) {
1110     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1111     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1112   } else {
1113     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1114     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1115   }
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNC__
1120 #define __FUNC__ "MatDuplicate_MPIDense"
1121 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1122 {
1123   Mat          mat;
1124   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1125   int          ierr;
1126 
1127   PetscFunctionBegin;
1128   *newmat       = 0;
1129   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1130   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1131   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1132   mat->data         = (void*)a;
1133   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1134   mat->factor       = A->factor;
1135   mat->assembled    = PETSC_TRUE;
1136   mat->preallocated = PETSC_TRUE;
1137 
1138   a->rstart       = oldmat->rstart;
1139   a->rend         = oldmat->rend;
1140   a->size         = oldmat->size;
1141   a->rank         = oldmat->rank;
1142   mat->insertmode = NOT_SET_VALUES;
1143   a->nvec         = oldmat->nvec;
1144   a->donotstash   = oldmat->donotstash;
1145   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1146   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1147   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1148   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1149 
1150   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1151   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1152   PetscLogObjectParent(mat,a->A);
1153   *newmat = mat;
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #include "petscsys.h"
1158 
1159 #undef __FUNC__
1160 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1161 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1162 {
1163   int        *rowners,i,size,rank,m,ierr,nz,j;
1164   Scalar     *array,*vals,*vals_ptr;
1165   MPI_Status status;
1166 
1167   PetscFunctionBegin;
1168   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1170 
1171   /* determine ownership of all rows */
1172   m          = M/size + ((M % size) > rank);
1173   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1174   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1175   rowners[0] = 0;
1176   for (i=2; i<=size; i++) {
1177     rowners[i] += rowners[i-1];
1178   }
1179 
1180   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1181   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1182 
1183   if (!rank) {
1184     ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr);
1185 
1186     /* read in my part of the matrix numerical values  */
1187     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1188 
1189     /* insert into matrix-by row (this is why cannot directly read into array */
1190     vals_ptr = vals;
1191     for (i=0; i<m; i++) {
1192       for (j=0; j<N; j++) {
1193         array[i + j*m] = *vals_ptr++;
1194       }
1195     }
1196 
1197     /* read in other processors and ship out */
1198     for (i=1; i<size; i++) {
1199       nz   = (rowners[i+1] - rowners[i])*N;
1200       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1201       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1202     }
1203   } else {
1204     /* receive numeric values */
1205     ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr);
1206 
1207     /* receive message of values*/
1208     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1209 
1210     /* insert into matrix-by row (this is why cannot directly read into array */
1211     vals_ptr = vals;
1212     for (i=0; i<m; i++) {
1213       for (j=0; j<N; j++) {
1214         array[i + j*m] = *vals_ptr++;
1215       }
1216     }
1217   }
1218   ierr = PetscFree(rowners);CHKERRQ(ierr);
1219   ierr = PetscFree(vals);CHKERRQ(ierr);
1220   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1221   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 EXTERN_C_BEGIN
1226 #undef __FUNC__
1227 #define __FUNC__ "MatLoad_MPIDense"
1228 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1229 {
1230   Mat          A;
1231   Scalar       *vals,*svals;
1232   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1233   MPI_Status   status;
1234   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1235   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1236   int          tag = ((PetscObject)viewer)->tag;
1237   int          i,nz,ierr,j,rstart,rend,fd;
1238 
1239   PetscFunctionBegin;
1240   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1241   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1242   if (!rank) {
1243     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1244     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1245     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1246   }
1247 
1248   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1249   M = header[1]; N = header[2]; nz = header[3];
1250 
1251   /*
1252        Handle case where matrix is stored on disk as a dense matrix
1253   */
1254   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1255     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1256     PetscFunctionReturn(0);
1257   }
1258 
1259   /* determine ownership of all rows */
1260   m          = M/size + ((M % size) > rank);
1261   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1262   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1263   rowners[0] = 0;
1264   for (i=2; i<=size; i++) {
1265     rowners[i] += rowners[i-1];
1266   }
1267   rstart = rowners[rank];
1268   rend   = rowners[rank+1];
1269 
1270   /* distribute row lengths to all processors */
1271   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1272   offlens = ourlens + (rend-rstart);
1273   if (!rank) {
1274     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1275     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1276     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1277     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1278     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1279     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1280   } else {
1281     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1282   }
1283 
1284   if (!rank) {
1285     /* calculate the number of nonzeros on each processor */
1286     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1287     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1288     for (i=0; i<size; i++) {
1289       for (j=rowners[i]; j< rowners[i+1]; j++) {
1290         procsnz[i] += rowlengths[j];
1291       }
1292     }
1293     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1294 
1295     /* determine max buffer needed and allocate it */
1296     maxnz = 0;
1297     for (i=0; i<size; i++) {
1298       maxnz = PetscMax(maxnz,procsnz[i]);
1299     }
1300     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1301 
1302     /* read in my part of the matrix column indices  */
1303     nz = procsnz[0];
1304     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1305     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1306 
1307     /* read in every one elses and ship off */
1308     for (i=1; i<size; i++) {
1309       nz   = procsnz[i];
1310       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1311       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1312     }
1313     ierr = PetscFree(cols);CHKERRQ(ierr);
1314   } else {
1315     /* determine buffer space needed for message */
1316     nz = 0;
1317     for (i=0; i<m; i++) {
1318       nz += ourlens[i];
1319     }
1320     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1321 
1322     /* receive message of column indices*/
1323     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1324     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1325     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1326   }
1327 
1328   /* loop over local rows, determining number of off diagonal entries */
1329   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1330   jj = 0;
1331   for (i=0; i<m; i++) {
1332     for (j=0; j<ourlens[i]; j++) {
1333       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1334       jj++;
1335     }
1336   }
1337 
1338   /* create our matrix */
1339   for (i=0; i<m; i++) {
1340     ourlens[i] -= offlens[i];
1341   }
1342   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1343   A = *newmat;
1344   for (i=0; i<m; i++) {
1345     ourlens[i] += offlens[i];
1346   }
1347 
1348   if (!rank) {
1349     ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr);
1350 
1351     /* read in my part of the matrix numerical values  */
1352     nz = procsnz[0];
1353     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1354 
1355     /* insert into matrix */
1356     jj      = rstart;
1357     smycols = mycols;
1358     svals   = vals;
1359     for (i=0; i<m; i++) {
1360       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1361       smycols += ourlens[i];
1362       svals   += ourlens[i];
1363       jj++;
1364     }
1365 
1366     /* read in other processors and ship out */
1367     for (i=1; i<size; i++) {
1368       nz   = procsnz[i];
1369       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1370       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1371     }
1372     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1373   } else {
1374     /* receive numeric values */
1375     ierr = PetscMalloc(nz*sizeof(Scalar),&vals);CHKERRQ(ierr);
1376 
1377     /* receive message of values*/
1378     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1379     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1380     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1381 
1382     /* insert into matrix */
1383     jj      = rstart;
1384     smycols = mycols;
1385     svals   = vals;
1386     for (i=0; i<m; i++) {
1387       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1388       smycols += ourlens[i];
1389       svals   += ourlens[i];
1390       jj++;
1391     }
1392   }
1393   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1394   ierr = PetscFree(vals);CHKERRQ(ierr);
1395   ierr = PetscFree(mycols);CHKERRQ(ierr);
1396   ierr = PetscFree(rowners);CHKERRQ(ierr);
1397 
1398   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1399   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 EXTERN_C_END
1403 
1404 
1405 
1406 
1407 
1408