xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4b27c08ac4c7fe27d83aaa07ba0dc5abca4ca6c8)
1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNCT__
12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,ierr;
17   PetscScalar  *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
27   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
28   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN int MatSetUpMultiply_MPIDense(Mat);
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatSetValues_MPIDense"
42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv)
43 {
44   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
45   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
46   PetscTruth   roworiented = A->roworiented;
47 
48   PetscFunctionBegin;
49   for (i=0; i<m; i++) {
50     if (idxm[i] < 0) continue;
51     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
52     if (idxm[i] >= rstart && idxm[i] < rend) {
53       row = idxm[i] - rstart;
54       if (roworiented) {
55         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
56       } else {
57         for (j=0; j<n; j++) {
58           if (idxn[j] < 0) continue;
59           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
60           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
61         }
62       }
63     } else {
64       if (!A->donotstash) {
65         if (roworiented) {
66           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
67         } else {
68           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
69         }
70       }
71     }
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetValues_MPIDense"
78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
79 {
80   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
81   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82 
83   PetscFunctionBegin;
84   for (i=0; i<m; i++) {
85     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for (j=0; j<n; j++) {
90         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91         if (idxn[j] >= mat->N) {
92           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93         }
94         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
95       }
96     } else {
97       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98     }
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatGetArray_MPIDense"
105 int MatGetArray_MPIDense(Mat A,PetscScalar **array)
106 {
107   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108   int          ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118 {
119   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
122   PetscScalar  *av,*bv,*v = lmat->v;
123   Mat          newmat;
124 
125   PetscFunctionBegin;
126   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
127   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
128   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
129   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
130 
131   /* No parallel redistribution currently supported! Should really check each index set
132      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
133      original matrix! */
134 
135   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
136   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
137 
138   /* Check submatrix call */
139   if (scall == MAT_REUSE_MATRIX) {
140     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
141     /* Really need to test rows and column sizes! */
142     newmat = *B;
143   } else {
144     /* Create and fill new matrix */
145     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
146   }
147 
148   /* Now extract the data pointers and do the copy, column at a time */
149   newmatd = (Mat_MPIDense*)newmat->data;
150   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
151 
152   for (i=0; i<ncols; i++) {
153     av = v + nlrows*icol[i];
154     for (j=0; j<nrows; j++) {
155       *bv++ = av[irow[j] - rstart];
156     }
157   }
158 
159   /* Assemble the matrices so that the correct flags are set */
160   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
161   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
162 
163   /* Free work space */
164   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
165   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
166   *B = newmat;
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatRestoreArray_MPIDense"
172 int MatRestoreArray_MPIDense(Mat A,PetscScalar **array)
173 {
174   PetscFunctionBegin;
175   PetscFunctionReturn(0);
176 }
177 
178 #undef __FUNCT__
179 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
181 {
182   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
183   MPI_Comm     comm = mat->comm;
184   int          ierr,nstash,reallocs;
185   InsertMode   addv;
186 
187   PetscFunctionBegin;
188   /* make sure all processors are either in INSERTMODE or ADDMODE */
189   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
190   if (addv == (ADD_VALUES|INSERT_VALUES)) {
191     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
192   }
193   mat->insertmode = addv; /* in case this processor had no cache */
194 
195   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
196   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
197   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
198   PetscFunctionReturn(0);
199 }
200 
201 #undef __FUNCT__
202 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
204 {
205   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
206   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
207   PetscScalar  *val;
208   InsertMode   addv=mat->insertmode;
209 
210   PetscFunctionBegin;
211   /*  wait on receives */
212   while (1) {
213     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
214     if (!flg) break;
215 
216     for (i=0; i<n;) {
217       /* Now identify the consecutive vals belonging to the same row */
218       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
219       if (j < n) ncols = j-i;
220       else       ncols = n-i;
221       /* Now assemble all these values with a single function call */
222       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
223       i = j;
224     }
225   }
226   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
227 
228   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
229   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
230 
231   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
232     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatZeroEntries_MPIDense"
239 int MatZeroEntries_MPIDense(Mat A)
240 {
241   int          ierr;
242   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
243 
244   PetscFunctionBegin;
245   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatGetBlockSize_MPIDense"
251 int MatGetBlockSize_MPIDense(Mat A,int *bs)
252 {
253   PetscFunctionBegin;
254   *bs = 1;
255   PetscFunctionReturn(0);
256 }
257 
258 /* the code does not do the diagonal entries correctly unless the
259    matrix is square and the column and row owerships are identical.
260    This is a BUG. The only way to fix it seems to be to access
261    mdn->A and mdn->B directly and not through the MatZeroRows()
262    routine.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatZeroRows_MPIDense"
266 int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag)
267 {
268   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
269   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
270   int            *procs,*nprocs,j,idx,nsends,*work;
271   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
272   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
273   int            *lens,imdex,*lrows,*values;
274   MPI_Comm       comm = A->comm;
275   MPI_Request    *send_waits,*recv_waits;
276   MPI_Status     recv_status,*send_status;
277   IS             istmp;
278   PetscTruth     found;
279 
280   PetscFunctionBegin;
281   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
282   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
283 
284   /*  first count number of contributors to each processor */
285   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
286   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
287   procs = nprocs + size;
288   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
289   for (i=0; i<N; i++) {
290     idx = rows[i];
291     found = PETSC_FALSE;
292     for (j=0; j<size; j++) {
293       if (idx >= owners[j] && idx < owners[j+1]) {
294         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
295       }
296     }
297     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
298   }
299   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
300 
301   /* inform other processors of number of messages and max length*/
302   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
303   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
304   nmax   = work[rank];
305   nrecvs = work[size+rank];
306   ierr   = PetscFree(work);CHKERRQ(ierr);
307 
308   /* post receives:   */
309   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
310   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
311   for (i=0; i<nrecvs; i++) {
312     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
313   }
314 
315   /* do sends:
316       1) starts[i] gives the starting index in svalues for stuff going to
317          the ith processor
318   */
319   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
320   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
321   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
322   starts[0]  = 0;
323   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
324   for (i=0; i<N; i++) {
325     svalues[starts[owner[i]]++] = rows[i];
326   }
327   ISRestoreIndices(is,&rows);
328 
329   starts[0] = 0;
330   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
331   count = 0;
332   for (i=0; i<size; i++) {
333     if (procs[i]) {
334       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
335     }
336   }
337   ierr = PetscFree(starts);CHKERRQ(ierr);
338 
339   base = owners[rank];
340 
341   /*  wait on receives */
342   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
343   source = lens + nrecvs;
344   count  = nrecvs; slen = 0;
345   while (count) {
346     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
347     /* unpack receives into our local space */
348     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
349     source[imdex]  = recv_status.MPI_SOURCE;
350     lens[imdex]  = n;
351     slen += n;
352     count--;
353   }
354   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
355 
356   /* move the data into the send scatter */
357   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
358   count = 0;
359   for (i=0; i<nrecvs; i++) {
360     values = rvalues + i*nmax;
361     for (j=0; j<lens[i]; j++) {
362       lrows[count++] = values[j] - base;
363     }
364   }
365   ierr = PetscFree(rvalues);CHKERRQ(ierr);
366   ierr = PetscFree(lens);CHKERRQ(ierr);
367   ierr = PetscFree(owner);CHKERRQ(ierr);
368   ierr = PetscFree(nprocs);CHKERRQ(ierr);
369 
370   /* actually zap the local rows */
371   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
372   PetscLogObjectParent(A,istmp);
373   ierr = PetscFree(lrows);CHKERRQ(ierr);
374   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
375   ierr = ISDestroy(istmp);CHKERRQ(ierr);
376 
377   /* wait on sends */
378   if (nsends) {
379     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
380     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
381     ierr = PetscFree(send_status);CHKERRQ(ierr);
382   }
383   ierr = PetscFree(send_waits);CHKERRQ(ierr);
384   ierr = PetscFree(svalues);CHKERRQ(ierr);
385 
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatMult_MPIDense"
391 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
394   int          ierr;
395 
396   PetscFunctionBegin;
397   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
398   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
400   PetscFunctionReturn(0);
401 }
402 
403 #undef __FUNCT__
404 #define __FUNCT__ "MatMultAdd_MPIDense"
405 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
406 {
407   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
408   int          ierr;
409 
410   PetscFunctionBegin;
411   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
412   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatMultTranspose_MPIDense"
419 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
420 {
421   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
422   int          ierr;
423   PetscScalar  zero = 0.0;
424 
425   PetscFunctionBegin;
426   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
427   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
428   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
429   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
435 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
436 {
437   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
438   int          ierr;
439 
440   PetscFunctionBegin;
441   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
442   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
443   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
444   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatGetDiagonal_MPIDense"
450 int MatGetDiagonal_MPIDense(Mat A,Vec v)
451 {
452   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
453   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
454   int          ierr,len,i,n,m = A->m,radd;
455   PetscScalar  *x,zero = 0.0;
456 
457   PetscFunctionBegin;
458   ierr = VecSet(&zero,v);CHKERRQ(ierr);
459   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
460   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
461   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
462   len  = PetscMin(a->A->m,a->A->n);
463   radd = a->rstart*m;
464   for (i=0; i<len; i++) {
465     x[i] = aloc->v[radd + i*m + i];
466   }
467   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatDestroy_MPIDense"
473 int MatDestroy_MPIDense(Mat mat)
474 {
475   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
476   int          ierr;
477 
478   PetscFunctionBegin;
479 
480 #if defined(PETSC_USE_LOG)
481   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
482 #endif
483   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
484   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
485   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
486   if (mdn->lvec)   VecDestroy(mdn->lvec);
487   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
488   if (mdn->factor) {
489     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
490     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
491     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
492     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
493   }
494   ierr = PetscFree(mdn);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatView_MPIDense_Binary"
500 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
501 {
502   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
503   int          ierr;
504 
505   PetscFunctionBegin;
506   if (mdn->size == 1) {
507     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
508   }
509   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
515 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
516 {
517   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
518   int               ierr,size = mdn->size,rank = mdn->rank;
519   PetscViewerType   vtype;
520   PetscTruth        isascii,isdraw;
521   PetscViewer       sviewer;
522   PetscViewerFormat format;
523 
524   PetscFunctionBegin;
525   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
526   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
527   if (isascii) {
528     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
529     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
530     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
531       MatInfo info;
532       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
533       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
534                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
535       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
536       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
537       PetscFunctionReturn(0);
538     } else if (format == PETSC_VIEWER_ASCII_INFO) {
539       PetscFunctionReturn(0);
540     }
541   } else if (isdraw) {
542     PetscDraw       draw;
543     PetscTruth isnull;
544 
545     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
546     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
547     if (isnull) PetscFunctionReturn(0);
548   }
549 
550   if (size == 1) {
551     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
552   } else {
553     /* assemble the entire matrix onto first processor. */
554     Mat          A;
555     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
556     PetscScalar  *vals;
557 
558     if (!rank) {
559       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
560     } else {
561       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
562     }
563     PetscLogObjectParent(mat,A);
564 
565     /* Copy the matrix ... This isn't the most efficient means,
566        but it's quick for now */
567     row = mdn->rstart; m = mdn->A->m;
568     for (i=0; i<m; i++) {
569       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
570       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
571       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
572       row++;
573     }
574 
575     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
576     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
577     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
578     if (!rank) {
579       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
580     }
581     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
582     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
583     ierr = MatDestroy(A);CHKERRQ(ierr);
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatView_MPIDense"
590 int MatView_MPIDense(Mat mat,PetscViewer viewer)
591 {
592   int        ierr;
593   PetscTruth isascii,isbinary,isdraw,issocket;
594 
595   PetscFunctionBegin;
596 
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
601 
602   if (isascii || issocket || isdraw) {
603     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
604   } else if (isbinary) {
605     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
606   } else {
607     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatGetInfo_MPIDense"
614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
615 {
616   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
617   Mat          mdn = mat->A;
618   int          ierr;
619   PetscReal    isend[5],irecv[5];
620 
621   PetscFunctionBegin;
622   info->rows_global    = (double)A->M;
623   info->columns_global = (double)A->N;
624   info->rows_local     = (double)A->m;
625   info->columns_local  = (double)A->N;
626   info->block_size     = 1.0;
627   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
628   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
629   isend[3] = info->memory;  isend[4] = info->mallocs;
630   if (flag == MAT_LOCAL) {
631     info->nz_used      = isend[0];
632     info->nz_allocated = isend[1];
633     info->nz_unneeded  = isend[2];
634     info->memory       = isend[3];
635     info->mallocs      = isend[4];
636   } else if (flag == MAT_GLOBAL_MAX) {
637     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
638     info->nz_used      = irecv[0];
639     info->nz_allocated = irecv[1];
640     info->nz_unneeded  = irecv[2];
641     info->memory       = irecv[3];
642     info->mallocs      = irecv[4];
643   } else if (flag == MAT_GLOBAL_SUM) {
644     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
645     info->nz_used      = irecv[0];
646     info->nz_allocated = irecv[1];
647     info->nz_unneeded  = irecv[2];
648     info->memory       = irecv[3];
649     info->mallocs      = irecv[4];
650   }
651   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
652   info->fill_ratio_needed = 0;
653   info->factor_mallocs    = 0;
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatSetOption_MPIDense"
659 int MatSetOption_MPIDense(Mat A,MatOption op)
660 {
661   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
662   int          ierr;
663 
664   PetscFunctionBegin;
665   switch (op) {
666   case MAT_NO_NEW_NONZERO_LOCATIONS:
667   case MAT_YES_NEW_NONZERO_LOCATIONS:
668   case MAT_NEW_NONZERO_LOCATION_ERR:
669   case MAT_NEW_NONZERO_ALLOCATION_ERR:
670   case MAT_COLUMNS_SORTED:
671   case MAT_COLUMNS_UNSORTED:
672     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
673     break;
674   case MAT_ROW_ORIENTED:
675     a->roworiented = PETSC_TRUE;
676     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
677     break;
678   case MAT_ROWS_SORTED:
679   case MAT_ROWS_UNSORTED:
680   case MAT_YES_NEW_DIAGONALS:
681   case MAT_USE_HASH_TABLE:
682   case MAT_USE_SINGLE_PRECISION_SOLVES:
683     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
684     break;
685   case MAT_COLUMN_ORIENTED:
686     a->roworiented = PETSC_FALSE;
687     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
688     break;
689   case MAT_IGNORE_OFF_PROC_ENTRIES:
690     a->donotstash = PETSC_TRUE;
691     break;
692   case MAT_NO_NEW_DIAGONALS:
693     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
694   default:
695     SETERRQ(PETSC_ERR_SUP,"unknown option");
696   }
697   PetscFunctionReturn(0);
698 }
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "MatGetRow_MPIDense"
702 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
703 {
704   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
705   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
706 
707   PetscFunctionBegin;
708   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
709   lrow = row - rstart;
710   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatRestoreRow_MPIDense"
716 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
717 {
718   int ierr;
719 
720   PetscFunctionBegin;
721   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
722   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatDiagonalScale_MPIDense"
728 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
729 {
730   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
731   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
732   PetscScalar  *l,*r,x,*v;
733   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
734 
735   PetscFunctionBegin;
736   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
737   if (ll) {
738     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
739     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
740     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
741     for (i=0; i<m; i++) {
742       x = l[i];
743       v = mat->v + i;
744       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
745     }
746     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
747     PetscLogFlops(n*m);
748   }
749   if (rr) {
750     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
751     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
752     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
753     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
754     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
755     for (i=0; i<n; i++) {
756       x = r[i];
757       v = mat->v + i*m;
758       for (j=0; j<m; j++) { (*v++) *= x;}
759     }
760     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
761     PetscLogFlops(n*m);
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatNorm_MPIDense"
768 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
769 {
770   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
771   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
772   int          ierr,i,j;
773   PetscReal    sum = 0.0;
774   PetscScalar  *v = mat->v;
775 
776   PetscFunctionBegin;
777   if (mdn->size == 1) {
778     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
779   } else {
780     if (type == NORM_FROBENIUS) {
781       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
782 #if defined(PETSC_USE_COMPLEX)
783         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
784 #else
785         sum += (*v)*(*v); v++;
786 #endif
787       }
788       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
789       *nrm = sqrt(*nrm);
790       PetscLogFlops(2*mdn->A->n*mdn->A->m);
791     } else if (type == NORM_1) {
792       PetscReal *tmp,*tmp2;
793       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
794       tmp2 = tmp + A->N;
795       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
796       *nrm = 0.0;
797       v = mat->v;
798       for (j=0; j<mdn->A->n; j++) {
799         for (i=0; i<mdn->A->m; i++) {
800           tmp[j] += PetscAbsScalar(*v);  v++;
801         }
802       }
803       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
804       for (j=0; j<A->N; j++) {
805         if (tmp2[j] > *nrm) *nrm = tmp2[j];
806       }
807       ierr = PetscFree(tmp);CHKERRQ(ierr);
808       PetscLogFlops(A->n*A->m);
809     } else if (type == NORM_INFINITY) { /* max row norm */
810       PetscReal ntemp;
811       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
812       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
813     } else {
814       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
815     }
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatTranspose_MPIDense"
822 int MatTranspose_MPIDense(Mat A,Mat *matout)
823 {
824   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
825   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
826   Mat          B;
827   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
828   int          j,i,ierr;
829   PetscScalar  *v;
830 
831   PetscFunctionBegin;
832   if (!matout && M != N) {
833     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
834   }
835   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
836 
837   m = a->A->m; n = a->A->n; v = Aloc->v;
838   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
839   for (j=0; j<n; j++) {
840     for (i=0; i<m; i++) rwork[i] = rstart + i;
841     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
842     v   += m;
843   }
844   ierr = PetscFree(rwork);CHKERRQ(ierr);
845   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
846   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
847   if (matout) {
848     *matout = B;
849   } else {
850     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 #include "petscblaslapack.h"
856 #undef __FUNCT__
857 #define __FUNCT__ "MatScale_MPIDense"
858 int MatScale_MPIDense(PetscScalar *alpha,Mat inA)
859 {
860   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
861   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
862   int          one = 1,nz;
863 
864   PetscFunctionBegin;
865   nz = inA->m*inA->N;
866   BLscal_(&nz,alpha,a->v,&one);
867   PetscLogFlops(nz);
868   PetscFunctionReturn(0);
869 }
870 
871 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
872 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
876 int MatSetUpPreallocation_MPIDense(Mat A)
877 {
878   int        ierr;
879 
880   PetscFunctionBegin;
881   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
882   PetscFunctionReturn(0);
883 }
884 
885 /* -------------------------------------------------------------------*/
886 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
887        MatGetRow_MPIDense,
888        MatRestoreRow_MPIDense,
889        MatMult_MPIDense,
890        MatMultAdd_MPIDense,
891        MatMultTranspose_MPIDense,
892        MatMultTransposeAdd_MPIDense,
893        0,
894        0,
895        0,
896        0,
897        0,
898        0,
899        0,
900        MatTranspose_MPIDense,
901        MatGetInfo_MPIDense,0,
902        MatGetDiagonal_MPIDense,
903        MatDiagonalScale_MPIDense,
904        MatNorm_MPIDense,
905        MatAssemblyBegin_MPIDense,
906        MatAssemblyEnd_MPIDense,
907        0,
908        MatSetOption_MPIDense,
909        MatZeroEntries_MPIDense,
910        MatZeroRows_MPIDense,
911        0,
912        0,
913        0,
914        0,
915        MatSetUpPreallocation_MPIDense,
916        0,
917        0,
918        MatGetArray_MPIDense,
919        MatRestoreArray_MPIDense,
920        MatDuplicate_MPIDense,
921        0,
922        0,
923        0,
924        0,
925        0,
926        MatGetSubMatrices_MPIDense,
927        0,
928        MatGetValues_MPIDense,
929        0,
930        0,
931        MatScale_MPIDense,
932        0,
933        0,
934        0,
935        MatGetBlockSize_MPIDense,
936        0,
937        0,
938        0,
939        0,
940        0,
941        0,
942        0,
943        0,
944        0,
945        MatGetSubMatrix_MPIDense,
946        MatDestroy_MPIDense,
947        MatView_MPIDense,
948        MatGetPetscMaps_Petsc};
949 
950 EXTERN_C_BEGIN
951 #undef __FUNCT__
952 #define __FUNCT__ "MatCreate_MPIDense"
953 int MatCreate_MPIDense(Mat mat)
954 {
955   Mat_MPIDense *a;
956   int          ierr,i;
957 
958   PetscFunctionBegin;
959   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
960   mat->data         = (void*)a;
961   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
962   mat->factor       = 0;
963   mat->mapping      = 0;
964 
965   a->factor       = 0;
966   mat->insertmode = NOT_SET_VALUES;
967   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
968   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
969 
970   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
971   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
972   a->nvec = mat->n;
973 
974   /* the information in the maps duplicates the information computed below, eventually
975      we should remove the duplicate information that is not contained in the maps */
976   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
977   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
978 
979   /* build local table of row and column ownerships */
980   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
981   a->cowners = a->rowners + a->size + 1;
982   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
983   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
984   a->rowners[0] = 0;
985   for (i=2; i<=a->size; i++) {
986     a->rowners[i] += a->rowners[i-1];
987   }
988   a->rstart = a->rowners[a->rank];
989   a->rend   = a->rowners[a->rank+1];
990   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
991   a->cowners[0] = 0;
992   for (i=2; i<=a->size; i++) {
993     a->cowners[i] += a->cowners[i-1];
994   }
995 
996   /* build cache for off array entries formed */
997   a->donotstash = PETSC_FALSE;
998   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
999 
1000   /* stuff used for matrix vector multiply */
1001   a->lvec        = 0;
1002   a->Mvctx       = 0;
1003   a->roworiented = PETSC_TRUE;
1004 
1005   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1006                                      "MatGetDiagonalBlock_MPIDense",
1007                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 EXTERN_C_END
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1014 /*@C
1015    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1016 
1017    Not collective
1018 
1019    Input Parameters:
1020 .  A - the matrix
1021 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1022    to control all matrix memory allocation.
1023 
1024    Notes:
1025    The dense format is fully compatible with standard Fortran 77
1026    storage by columns.
1027 
1028    The data input variable is intended primarily for Fortran programmers
1029    who wish to allocate their own matrix memory space.  Most users should
1030    set data=PETSC_NULL.
1031 
1032    Level: intermediate
1033 
1034 .keywords: matrix,dense, parallel
1035 
1036 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1037 @*/
1038 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1039 {
1040   Mat_MPIDense *a;
1041   int          ierr;
1042   PetscTruth   flg2;
1043 
1044   PetscFunctionBegin;
1045   ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr);
1046   if (!flg2) PetscFunctionReturn(0);
1047   mat->preallocated = PETSC_TRUE;
1048   /* Note:  For now, when data is specified above, this assumes the user correctly
1049    allocates the local dense storage space.  We should add error checking. */
1050 
1051   a    = (Mat_MPIDense*)mat->data;
1052   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
1053   PetscLogObjectParent(mat,a->A);
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatCreateMPIDense"
1059 /*@C
1060    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1061 
1062    Collective on MPI_Comm
1063 
1064    Input Parameters:
1065 +  comm - MPI communicator
1066 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1067 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1068 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1069 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1070 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1071    to control all matrix memory allocation.
1072 
1073    Output Parameter:
1074 .  A - the matrix
1075 
1076    Notes:
1077    The dense format is fully compatible with standard Fortran 77
1078    storage by columns.
1079 
1080    The data input variable is intended primarily for Fortran programmers
1081    who wish to allocate their own matrix memory space.  Most users should
1082    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1083 
1084    The user MUST specify either the local or global matrix dimensions
1085    (possibly both).
1086 
1087    Level: intermediate
1088 
1089 .keywords: matrix,dense, parallel
1090 
1091 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1092 @*/
1093 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1094 {
1095   int ierr,size;
1096 
1097   PetscFunctionBegin;
1098   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1099   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1100   if (size > 1) {
1101     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1102     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1103   } else {
1104     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1105     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1106   }
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatDuplicate_MPIDense"
1112 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1113 {
1114   Mat          mat;
1115   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1116   int          ierr;
1117 
1118   PetscFunctionBegin;
1119   *newmat       = 0;
1120   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1121   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1122   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1123   mat->data         = (void*)a;
1124   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1125   mat->factor       = A->factor;
1126   mat->assembled    = PETSC_TRUE;
1127   mat->preallocated = PETSC_TRUE;
1128 
1129   a->rstart       = oldmat->rstart;
1130   a->rend         = oldmat->rend;
1131   a->size         = oldmat->size;
1132   a->rank         = oldmat->rank;
1133   mat->insertmode = NOT_SET_VALUES;
1134   a->nvec         = oldmat->nvec;
1135   a->donotstash   = oldmat->donotstash;
1136   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1137   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1138   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1139   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1140 
1141   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1142   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1143   PetscLogObjectParent(mat,a->A);
1144   *newmat = mat;
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #include "petscsys.h"
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1152 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1153 {
1154   int          *rowners,i,size,rank,m,ierr,nz,j;
1155   PetscScalar  *array,*vals,*vals_ptr;
1156   MPI_Status   status;
1157 
1158   PetscFunctionBegin;
1159   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1160   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1161 
1162   /* determine ownership of all rows */
1163   m          = M/size + ((M % size) > rank);
1164   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1165   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1166   rowners[0] = 0;
1167   for (i=2; i<=size; i++) {
1168     rowners[i] += rowners[i-1];
1169   }
1170 
1171   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1172   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1173 
1174   if (!rank) {
1175     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1176 
1177     /* read in my part of the matrix numerical values  */
1178     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1179 
1180     /* insert into matrix-by row (this is why cannot directly read into array */
1181     vals_ptr = vals;
1182     for (i=0; i<m; i++) {
1183       for (j=0; j<N; j++) {
1184         array[i + j*m] = *vals_ptr++;
1185       }
1186     }
1187 
1188     /* read in other processors and ship out */
1189     for (i=1; i<size; i++) {
1190       nz   = (rowners[i+1] - rowners[i])*N;
1191       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1192       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1193     }
1194   } else {
1195     /* receive numeric values */
1196     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1197 
1198     /* receive message of values*/
1199     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1200 
1201     /* insert into matrix-by row (this is why cannot directly read into array */
1202     vals_ptr = vals;
1203     for (i=0; i<m; i++) {
1204       for (j=0; j<N; j++) {
1205         array[i + j*m] = *vals_ptr++;
1206       }
1207     }
1208   }
1209   ierr = PetscFree(rowners);CHKERRQ(ierr);
1210   ierr = PetscFree(vals);CHKERRQ(ierr);
1211   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1212   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 EXTERN_C_BEGIN
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatLoad_MPIDense"
1219 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1220 {
1221   Mat          A;
1222   PetscScalar  *vals,*svals;
1223   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1224   MPI_Status   status;
1225   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1226   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1227   int          tag = ((PetscObject)viewer)->tag;
1228   int          i,nz,ierr,j,rstart,rend,fd;
1229 
1230   PetscFunctionBegin;
1231   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1232   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1233   if (!rank) {
1234     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1235     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1236     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1237   }
1238 
1239   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1240   M = header[1]; N = header[2]; nz = header[3];
1241 
1242   /*
1243        Handle case where matrix is stored on disk as a dense matrix
1244   */
1245   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1246     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1247     PetscFunctionReturn(0);
1248   }
1249 
1250   /* determine ownership of all rows */
1251   m          = M/size + ((M % size) > rank);
1252   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1253   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1254   rowners[0] = 0;
1255   for (i=2; i<=size; i++) {
1256     rowners[i] += rowners[i-1];
1257   }
1258   rstart = rowners[rank];
1259   rend   = rowners[rank+1];
1260 
1261   /* distribute row lengths to all processors */
1262   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1263   offlens = ourlens + (rend-rstart);
1264   if (!rank) {
1265     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1266     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1267     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1268     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1269     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1270     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1271   } else {
1272     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1273   }
1274 
1275   if (!rank) {
1276     /* calculate the number of nonzeros on each processor */
1277     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1278     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1279     for (i=0; i<size; i++) {
1280       for (j=rowners[i]; j< rowners[i+1]; j++) {
1281         procsnz[i] += rowlengths[j];
1282       }
1283     }
1284     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1285 
1286     /* determine max buffer needed and allocate it */
1287     maxnz = 0;
1288     for (i=0; i<size; i++) {
1289       maxnz = PetscMax(maxnz,procsnz[i]);
1290     }
1291     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1292 
1293     /* read in my part of the matrix column indices  */
1294     nz = procsnz[0];
1295     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1296     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1297 
1298     /* read in every one elses and ship off */
1299     for (i=1; i<size; i++) {
1300       nz   = procsnz[i];
1301       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1302       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1303     }
1304     ierr = PetscFree(cols);CHKERRQ(ierr);
1305   } else {
1306     /* determine buffer space needed for message */
1307     nz = 0;
1308     for (i=0; i<m; i++) {
1309       nz += ourlens[i];
1310     }
1311     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1312 
1313     /* receive message of column indices*/
1314     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1315     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1316     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1317   }
1318 
1319   /* loop over local rows, determining number of off diagonal entries */
1320   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1321   jj = 0;
1322   for (i=0; i<m; i++) {
1323     for (j=0; j<ourlens[i]; j++) {
1324       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1325       jj++;
1326     }
1327   }
1328 
1329   /* create our matrix */
1330   for (i=0; i<m; i++) {
1331     ourlens[i] -= offlens[i];
1332   }
1333   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1334   A = *newmat;
1335   for (i=0; i<m; i++) {
1336     ourlens[i] += offlens[i];
1337   }
1338 
1339   if (!rank) {
1340     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1341 
1342     /* read in my part of the matrix numerical values  */
1343     nz = procsnz[0];
1344     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1345 
1346     /* insert into matrix */
1347     jj      = rstart;
1348     smycols = mycols;
1349     svals   = vals;
1350     for (i=0; i<m; i++) {
1351       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1352       smycols += ourlens[i];
1353       svals   += ourlens[i];
1354       jj++;
1355     }
1356 
1357     /* read in other processors and ship out */
1358     for (i=1; i<size; i++) {
1359       nz   = procsnz[i];
1360       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1361       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1362     }
1363     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1364   } else {
1365     /* receive numeric values */
1366     ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1367 
1368     /* receive message of values*/
1369     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1370     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1371     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1372 
1373     /* insert into matrix */
1374     jj      = rstart;
1375     smycols = mycols;
1376     svals   = vals;
1377     for (i=0; i<m; i++) {
1378       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1379       smycols += ourlens[i];
1380       svals   += ourlens[i];
1381       jj++;
1382     }
1383   }
1384   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1385   ierr = PetscFree(vals);CHKERRQ(ierr);
1386   ierr = PetscFree(mycols);CHKERRQ(ierr);
1387   ierr = PetscFree(rowners);CHKERRQ(ierr);
1388 
1389   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 EXTERN_C_END
1394 
1395 
1396 
1397 
1398 
1399