xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4a2ae208534dc2960f861bf8a1e27d424854347f)
1 /*$Id: mpidense.c,v 1.152 2001/03/23 22:04:53 bsmith Exp balay $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNCT__
12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,ierr;
17   Scalar       *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
27   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
28   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN int MatSetUpMultiply_MPIDense(Mat);
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatSetValues_MPIDense"
42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
43 {
44   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
45   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
46   PetscTruth   roworiented = A->roworiented;
47 
48   PetscFunctionBegin;
49   for (i=0; i<m; i++) {
50     if (idxm[i] < 0) continue;
51     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
52     if (idxm[i] >= rstart && idxm[i] < rend) {
53       row = idxm[i] - rstart;
54       if (roworiented) {
55         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
56       } else {
57         for (j=0; j<n; j++) {
58           if (idxn[j] < 0) continue;
59           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
60           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
61         }
62       }
63     } else {
64       if (!A->donotstash) {
65         if (roworiented) {
66           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
67         } else {
68           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
69         }
70       }
71     }
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetValues_MPIDense"
78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
79 {
80   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
81   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82 
83   PetscFunctionBegin;
84   for (i=0; i<m; i++) {
85     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for (j=0; j<n; j++) {
90         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91         if (idxn[j] >= mat->N) {
92           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93         }
94         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
95       }
96     } else {
97       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98     }
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatGetArray_MPIDense"
105 int MatGetArray_MPIDense(Mat A,Scalar **array)
106 {
107   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108   int          ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118 {
119   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
122   Scalar       *av,*bv,*v = lmat->v;
123   Mat          newmat;
124 
125   PetscFunctionBegin;
126   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
127   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
128   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
129   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
130 
131   /* No parallel redistribution currently supported! Should really check each index set
132      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
133      original matrix! */
134 
135   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
136   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
137 
138   /* Check submatrix call */
139   if (scall == MAT_REUSE_MATRIX) {
140     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
141     /* Really need to test rows and column sizes! */
142     newmat = *B;
143   } else {
144     /* Create and fill new matrix */
145     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
146   }
147 
148   /* Now extract the data pointers and do the copy, column at a time */
149   newmatd = (Mat_MPIDense*)newmat->data;
150   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
151 
152   for (i=0; i<ncols; i++) {
153     av = v + nlrows*icol[i];
154     for (j=0; j<nrows; j++) {
155       *bv++ = av[irow[j] - rstart];
156     }
157   }
158 
159   /* Assemble the matrices so that the correct flags are set */
160   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
161   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
162 
163   /* Free work space */
164   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
165   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
166   *B = newmat;
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatRestoreArray_MPIDense"
172 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
173 {
174   PetscFunctionBegin;
175   PetscFunctionReturn(0);
176 }
177 
178 #undef __FUNCT__
179 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
181 {
182   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
183   MPI_Comm     comm = mat->comm;
184   int          ierr,nstash,reallocs;
185   InsertMode   addv;
186 
187   PetscFunctionBegin;
188   /* make sure all processors are either in INSERTMODE or ADDMODE */
189   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
190   if (addv == (ADD_VALUES|INSERT_VALUES)) {
191     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
192   }
193   mat->insertmode = addv; /* in case this processor had no cache */
194 
195   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
196   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
197   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
198   PetscFunctionReturn(0);
199 }
200 
201 #undef __FUNCT__
202 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
204 {
205   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
206   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
207   Scalar       *val;
208   InsertMode   addv=mat->insertmode;
209 
210   PetscFunctionBegin;
211   /*  wait on receives */
212   while (1) {
213     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
214     if (!flg) break;
215 
216     for (i=0; i<n;) {
217       /* Now identify the consecutive vals belonging to the same row */
218       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
219       if (j < n) ncols = j-i;
220       else       ncols = n-i;
221       /* Now assemble all these values with a single function call */
222       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
223       i = j;
224     }
225   }
226   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
227 
228   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
229   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
230 
231   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
232     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatZeroEntries_MPIDense"
239 int MatZeroEntries_MPIDense(Mat A)
240 {
241   int          ierr;
242   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
243 
244   PetscFunctionBegin;
245   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatGetBlockSize_MPIDense"
251 int MatGetBlockSize_MPIDense(Mat A,int *bs)
252 {
253   PetscFunctionBegin;
254   *bs = 1;
255   PetscFunctionReturn(0);
256 }
257 
258 /* the code does not do the diagonal entries correctly unless the
259    matrix is square and the column and row owerships are identical.
260    This is a BUG. The only way to fix it seems to be to access
261    mdn->A and mdn->B directly and not through the MatZeroRows()
262    routine.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatZeroRows_MPIDense"
266 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
267 {
268   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
269   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
270   int            *procs,*nprocs,j,idx,nsends,*work;
271   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
272   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
273   int            *lens,imdex,*lrows,*values;
274   MPI_Comm       comm = A->comm;
275   MPI_Request    *send_waits,*recv_waits;
276   MPI_Status     recv_status,*send_status;
277   IS             istmp;
278   PetscTruth     found;
279 
280   PetscFunctionBegin;
281   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
282   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
283 
284   /*  first count number of contributors to each processor */
285   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
286   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
287   procs = nprocs + size;
288   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
289   for (i=0; i<N; i++) {
290     idx = rows[i];
291     found = PETSC_FALSE;
292     for (j=0; j<size; j++) {
293       if (idx >= owners[j] && idx < owners[j+1]) {
294         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
295       }
296     }
297     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
298   }
299   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
300 
301   /* inform other processors of number of messages and max length*/
302   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
303   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
304   nmax   = work[rank];
305   nrecvs = work[size+rank];
306   ierr   = PetscFree(work);CHKERRQ(ierr);
307 
308   /* post receives:   */
309   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
310   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
311   for (i=0; i<nrecvs; i++) {
312     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
313   }
314 
315   /* do sends:
316       1) starts[i] gives the starting index in svalues for stuff going to
317          the ith processor
318   */
319   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
320   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
321   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
322   starts[0]  = 0;
323   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
324   for (i=0; i<N; i++) {
325     svalues[starts[owner[i]]++] = rows[i];
326   }
327   ISRestoreIndices(is,&rows);
328 
329   starts[0] = 0;
330   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
331   count = 0;
332   for (i=0; i<size; i++) {
333     if (procs[i]) {
334       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
335     }
336   }
337   ierr = PetscFree(starts);CHKERRQ(ierr);
338 
339   base = owners[rank];
340 
341   /*  wait on receives */
342   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
343   source = lens + nrecvs;
344   count  = nrecvs; slen = 0;
345   while (count) {
346     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
347     /* unpack receives into our local space */
348     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
349     source[imdex]  = recv_status.MPI_SOURCE;
350     lens[imdex]  = n;
351     slen += n;
352     count--;
353   }
354   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
355 
356   /* move the data into the send scatter */
357   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
358   count = 0;
359   for (i=0; i<nrecvs; i++) {
360     values = rvalues + i*nmax;
361     for (j=0; j<lens[i]; j++) {
362       lrows[count++] = values[j] - base;
363     }
364   }
365   ierr = PetscFree(rvalues);CHKERRQ(ierr);
366   ierr = PetscFree(lens);CHKERRQ(ierr);
367   ierr = PetscFree(owner);CHKERRQ(ierr);
368   ierr = PetscFree(nprocs);CHKERRQ(ierr);
369 
370   /* actually zap the local rows */
371   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
372   PetscLogObjectParent(A,istmp);
373   ierr = PetscFree(lrows);CHKERRQ(ierr);
374   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
375   ierr = ISDestroy(istmp);CHKERRQ(ierr);
376 
377   /* wait on sends */
378   if (nsends) {
379     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
380     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
381     ierr = PetscFree(send_status);CHKERRQ(ierr);
382   }
383   ierr = PetscFree(send_waits);CHKERRQ(ierr);
384   ierr = PetscFree(svalues);CHKERRQ(ierr);
385 
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatMult_MPIDense"
391 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
394   int          ierr;
395 
396   PetscFunctionBegin;
397   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
398   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
400   PetscFunctionReturn(0);
401 }
402 
403 #undef __FUNCT__
404 #define __FUNCT__ "MatMultAdd_MPIDense"
405 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
406 {
407   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
408   int          ierr;
409 
410   PetscFunctionBegin;
411   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
412   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatMultTranspose_MPIDense"
419 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
420 {
421   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
422   int          ierr;
423   Scalar       zero = 0.0;
424 
425   PetscFunctionBegin;
426   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
427   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
428   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
429   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
435 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
436 {
437   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
438   int          ierr;
439 
440   PetscFunctionBegin;
441   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
442   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
443   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
444   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatGetDiagonal_MPIDense"
450 int MatGetDiagonal_MPIDense(Mat A,Vec v)
451 {
452   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
453   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
454   int          ierr,len,i,n,m = A->m,radd;
455   Scalar       *x,zero = 0.0;
456 
457   PetscFunctionBegin;
458   ierr = VecSet(&zero,v);CHKERRQ(ierr);
459   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
460   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
461   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
462   len  = PetscMin(a->A->m,a->A->n);
463   radd = a->rstart*m;
464   for (i=0; i<len; i++) {
465     x[i] = aloc->v[radd + i*m + i];
466   }
467   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatDestroy_MPIDense"
473 int MatDestroy_MPIDense(Mat mat)
474 {
475   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
476   int          ierr;
477 
478   PetscFunctionBegin;
479 
480 #if defined(PETSC_USE_LOG)
481   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
482 #endif
483   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
484   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
485   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
486   if (mdn->lvec)   VecDestroy(mdn->lvec);
487   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
488   if (mdn->factor) {
489     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
490     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
491     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
492     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
493   }
494   ierr = PetscFree(mdn);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatView_MPIDense_Binary"
500 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
501 {
502   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
503   int          ierr;
504 
505   PetscFunctionBegin;
506   if (mdn->size == 1) {
507     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
508   }
509   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
515 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
516 {
517   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
518   int               ierr,size = mdn->size,rank = mdn->rank;
519   PetscViewerType   vtype;
520   PetscTruth        isascii,isdraw;
521   PetscViewer       sviewer;
522   PetscViewerFormat format;
523 
524   PetscFunctionBegin;
525   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
526   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
527   if (isascii) {
528     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
529     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
530     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
531       MatInfo info;
532       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
533       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
534                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
535       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
536       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
537       PetscFunctionReturn(0);
538     } else if (format == PETSC_VIEWER_ASCII_INFO) {
539       PetscFunctionReturn(0);
540     }
541   } else if (isdraw) {
542     PetscDraw       draw;
543     PetscTruth isnull;
544 
545     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
546     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
547     if (isnull) PetscFunctionReturn(0);
548   }
549 
550   if (size == 1) {
551     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
552   } else {
553     /* assemble the entire matrix onto first processor. */
554     Mat          A;
555     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
556     Scalar       *vals;
557 
558     if (!rank) {
559       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
560     } else {
561       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
562     }
563     PetscLogObjectParent(mat,A);
564 
565     /* Copy the matrix ... This isn't the most efficient means,
566        but it's quick for now */
567     row = mdn->rstart; m = mdn->A->m;
568     for (i=0; i<m; i++) {
569       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
570       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
571       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
572       row++;
573     }
574 
575     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
576     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
577     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
578     if (!rank) {
579       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
580     }
581     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
582     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
583     ierr = MatDestroy(A);CHKERRQ(ierr);
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatView_MPIDense"
590 int MatView_MPIDense(Mat mat,PetscViewer viewer)
591 {
592   int        ierr;
593   PetscTruth isascii,isbinary,isdraw,issocket;
594 
595   PetscFunctionBegin;
596 
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
601 
602   if (isascii || issocket || isdraw) {
603     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
604   } else if (isbinary) {
605     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
606   } else {
607     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatGetInfo_MPIDense"
614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
615 {
616   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
617   Mat          mdn = mat->A;
618   int          ierr;
619   PetscReal    isend[5],irecv[5];
620 
621   PetscFunctionBegin;
622   info->rows_global    = (double)A->M;
623   info->columns_global = (double)A->N;
624   info->rows_local     = (double)A->m;
625   info->columns_local  = (double)A->N;
626   info->block_size     = 1.0;
627   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
628   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
629   isend[3] = info->memory;  isend[4] = info->mallocs;
630   if (flag == MAT_LOCAL) {
631     info->nz_used      = isend[0];
632     info->nz_allocated = isend[1];
633     info->nz_unneeded  = isend[2];
634     info->memory       = isend[3];
635     info->mallocs      = isend[4];
636   } else if (flag == MAT_GLOBAL_MAX) {
637     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
638     info->nz_used      = irecv[0];
639     info->nz_allocated = irecv[1];
640     info->nz_unneeded  = irecv[2];
641     info->memory       = irecv[3];
642     info->mallocs      = irecv[4];
643   } else if (flag == MAT_GLOBAL_SUM) {
644     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
645     info->nz_used      = irecv[0];
646     info->nz_allocated = irecv[1];
647     info->nz_unneeded  = irecv[2];
648     info->memory       = irecv[3];
649     info->mallocs      = irecv[4];
650   }
651   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
652   info->fill_ratio_needed = 0;
653   info->factor_mallocs    = 0;
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatSetOption_MPIDense"
659 int MatSetOption_MPIDense(Mat A,MatOption op)
660 {
661   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
662   int          ierr;
663 
664   PetscFunctionBegin;
665   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
666       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
667       op == MAT_NEW_NONZERO_LOCATION_ERR ||
668       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
669       op == MAT_COLUMNS_SORTED ||
670       op == MAT_COLUMNS_UNSORTED) {
671         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
672   } else if (op == MAT_ROW_ORIENTED) {
673         a->roworiented = PETSC_TRUE;
674         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
675   } else if (op == MAT_ROWS_SORTED ||
676              op == MAT_ROWS_UNSORTED ||
677              op == MAT_YES_NEW_DIAGONALS ||
678              op == MAT_USE_HASH_TABLE) {
679     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
680   } else if (op == MAT_COLUMN_ORIENTED) {
681     a->roworiented = PETSC_FALSE;
682     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
683   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
684     a->donotstash = PETSC_TRUE;
685   } else if (op == MAT_NO_NEW_DIAGONALS) {
686     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
687   } else {
688     SETERRQ(PETSC_ERR_SUP,"unknown option");
689   }
690   PetscFunctionReturn(0);
691 }
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatGetOwnershipRange_MPIDense"
695 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
696 {
697   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
698 
699   PetscFunctionBegin;
700   if (m) *m = mat->rstart;
701   if (n) *n = mat->rend;
702   PetscFunctionReturn(0);
703 }
704 
705 #undef __FUNCT__
706 #define __FUNCT__ "MatGetRow_MPIDense"
707 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
708 {
709   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
710   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
711 
712   PetscFunctionBegin;
713   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
714   lrow = row - rstart;
715   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 #undef __FUNCT__
720 #define __FUNCT__ "MatRestoreRow_MPIDense"
721 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
722 {
723   int ierr;
724 
725   PetscFunctionBegin;
726   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
727   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatDiagonalScale_MPIDense"
733 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
734 {
735   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
736   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
737   Scalar       *l,*r,x,*v;
738   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
739 
740   PetscFunctionBegin;
741   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
742   if (ll) {
743     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
744     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
745     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
746     for (i=0; i<m; i++) {
747       x = l[i];
748       v = mat->v + i;
749       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
750     }
751     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
752     PetscLogFlops(n*m);
753   }
754   if (rr) {
755     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
756     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
757     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
758     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
759     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
760     for (i=0; i<n; i++) {
761       x = r[i];
762       v = mat->v + i*m;
763       for (j=0; j<m; j++) { (*v++) *= x;}
764     }
765     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
766     PetscLogFlops(n*m);
767   }
768   PetscFunctionReturn(0);
769 }
770 
771 #undef __FUNCT__
772 #define __FUNCT__ "MatNorm_MPIDense"
773 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm)
774 {
775   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
776   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
777   int          ierr,i,j;
778   PetscReal    sum = 0.0;
779   Scalar       *v = mat->v;
780 
781   PetscFunctionBegin;
782   if (mdn->size == 1) {
783     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
784   } else {
785     if (type == NORM_FROBENIUS) {
786       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
787 #if defined(PETSC_USE_COMPLEX)
788         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
789 #else
790         sum += (*v)*(*v); v++;
791 #endif
792       }
793       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
794       *norm = sqrt(*norm);
795       PetscLogFlops(2*mdn->A->n*mdn->A->m);
796     } else if (type == NORM_1) {
797       PetscReal *tmp,*tmp2;
798       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
799       tmp2 = tmp + A->N;
800       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
801       *norm = 0.0;
802       v = mat->v;
803       for (j=0; j<mdn->A->n; j++) {
804         for (i=0; i<mdn->A->m; i++) {
805           tmp[j] += PetscAbsScalar(*v);  v++;
806         }
807       }
808       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
809       for (j=0; j<A->N; j++) {
810         if (tmp2[j] > *norm) *norm = tmp2[j];
811       }
812       ierr = PetscFree(tmp);CHKERRQ(ierr);
813       PetscLogFlops(A->n*A->m);
814     } else if (type == NORM_INFINITY) { /* max row norm */
815       PetscReal ntemp;
816       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
817       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
818     } else {
819       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
820     }
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatTranspose_MPIDense"
827 int MatTranspose_MPIDense(Mat A,Mat *matout)
828 {
829   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
830   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
831   Mat          B;
832   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
833   int          j,i,ierr;
834   Scalar       *v;
835 
836   PetscFunctionBegin;
837   if (!matout && M != N) {
838     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
839   }
840   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
841 
842   m = a->A->m; n = a->A->n; v = Aloc->v;
843   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
844   for (j=0; j<n; j++) {
845     for (i=0; i<m; i++) rwork[i] = rstart + i;
846     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
847     v   += m;
848   }
849   ierr = PetscFree(rwork);CHKERRQ(ierr);
850   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
851   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
852   if (matout) {
853     *matout = B;
854   } else {
855     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 #include "petscblaslapack.h"
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIDense"
863 int MatScale_MPIDense(Scalar *alpha,Mat inA)
864 {
865   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
866   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
867   int          one = 1,nz;
868 
869   PetscFunctionBegin;
870   nz = inA->m*inA->N;
871   BLscal_(&nz,alpha,a->v,&one);
872   PetscLogFlops(nz);
873   PetscFunctionReturn(0);
874 }
875 
876 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
877 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
881 int MatSetUpPreallocation_MPIDense(Mat A)
882 {
883   int        ierr;
884 
885   PetscFunctionBegin;
886   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 /* -------------------------------------------------------------------*/
891 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
892        MatGetRow_MPIDense,
893        MatRestoreRow_MPIDense,
894        MatMult_MPIDense,
895        MatMultAdd_MPIDense,
896        MatMultTranspose_MPIDense,
897        MatMultTransposeAdd_MPIDense,
898        0,
899        0,
900        0,
901        0,
902        0,
903        0,
904        0,
905        MatTranspose_MPIDense,
906        MatGetInfo_MPIDense,0,
907        MatGetDiagonal_MPIDense,
908        MatDiagonalScale_MPIDense,
909        MatNorm_MPIDense,
910        MatAssemblyBegin_MPIDense,
911        MatAssemblyEnd_MPIDense,
912        0,
913        MatSetOption_MPIDense,
914        MatZeroEntries_MPIDense,
915        MatZeroRows_MPIDense,
916        0,
917        0,
918        0,
919        0,
920        MatSetUpPreallocation_MPIDense,
921        0,
922        MatGetOwnershipRange_MPIDense,
923        0,
924        0,
925        MatGetArray_MPIDense,
926        MatRestoreArray_MPIDense,
927        MatDuplicate_MPIDense,
928        0,
929        0,
930        0,
931        0,
932        0,
933        MatGetSubMatrices_MPIDense,
934        0,
935        MatGetValues_MPIDense,
936        0,
937        0,
938        MatScale_MPIDense,
939        0,
940        0,
941        0,
942        MatGetBlockSize_MPIDense,
943        0,
944        0,
945        0,
946        0,
947        0,
948        0,
949        0,
950        0,
951        0,
952        MatGetSubMatrix_MPIDense,
953        MatDestroy_MPIDense,
954        MatView_MPIDense,
955        MatGetMaps_Petsc};
956 
957 EXTERN_C_BEGIN
958 #undef __FUNCT__
959 #define __FUNCT__ "MatCreate_MPIDense"
960 int MatCreate_MPIDense(Mat mat)
961 {
962   Mat_MPIDense *a;
963   int          ierr,i;
964 
965   PetscFunctionBegin;
966   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
967   mat->data         = (void*)a;
968   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
969   mat->factor       = 0;
970   mat->mapping      = 0;
971 
972   a->factor       = 0;
973   mat->insertmode = NOT_SET_VALUES;
974   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
975   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
976 
977   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
978   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
979   a->nvec = mat->n;
980 
981   /* the information in the maps duplicates the information computed below, eventually
982      we should remove the duplicate information that is not contained in the maps */
983   ierr = MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
984   ierr = MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
985 
986   /* build local table of row and column ownerships */
987   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
988   a->cowners = a->rowners + a->size + 1;
989   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
990   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
991   a->rowners[0] = 0;
992   for (i=2; i<=a->size; i++) {
993     a->rowners[i] += a->rowners[i-1];
994   }
995   a->rstart = a->rowners[a->rank];
996   a->rend   = a->rowners[a->rank+1];
997   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
998   a->cowners[0] = 0;
999   for (i=2; i<=a->size; i++) {
1000     a->cowners[i] += a->cowners[i-1];
1001   }
1002 
1003   /* build cache for off array entries formed */
1004   a->donotstash = PETSC_FALSE;
1005   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1006 
1007   /* stuff used for matrix vector multiply */
1008   a->lvec        = 0;
1009   a->Mvctx       = 0;
1010   a->roworiented = PETSC_TRUE;
1011 
1012   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1013                                      "MatGetDiagonalBlock_MPIDense",
1014                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 EXTERN_C_END
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1021 /*@C
1022    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1023 
1024    Not collective
1025 
1026    Input Parameters:
1027 .  A - the matrix
1028 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1029    to control all matrix memory allocation.
1030 
1031    Notes:
1032    The dense format is fully compatible with standard Fortran 77
1033    storage by columns.
1034 
1035    The data input variable is intended primarily for Fortran programmers
1036    who wish to allocate their own matrix memory space.  Most users should
1037    set data=PETSC_NULL.
1038 
1039    Level: intermediate
1040 
1041 .keywords: matrix,dense, parallel
1042 
1043 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1044 @*/
1045 int MatMPIDenseSetPreallocation(Mat mat,Scalar *data)
1046 {
1047   Mat_MPIDense *a;
1048   int          ierr;
1049   PetscTruth   flg2;
1050 
1051   PetscFunctionBegin;
1052   ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr);
1053   if (!flg2) PetscFunctionReturn(0);
1054   mat->preallocated = PETSC_TRUE;
1055   /* Note:  For now, when data is specified above, this assumes the user correctly
1056    allocates the local dense storage space.  We should add error checking. */
1057 
1058   a    = (Mat_MPIDense*)mat->data;
1059   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
1060   PetscLogObjectParent(mat,a->A);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatCreateMPIDense"
1066 /*@C
1067    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1068 
1069    Collective on MPI_Comm
1070 
1071    Input Parameters:
1072 +  comm - MPI communicator
1073 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1074 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1075 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1076 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1077 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1078    to control all matrix memory allocation.
1079 
1080    Output Parameter:
1081 .  A - the matrix
1082 
1083    Notes:
1084    The dense format is fully compatible with standard Fortran 77
1085    storage by columns.
1086 
1087    The data input variable is intended primarily for Fortran programmers
1088    who wish to allocate their own matrix memory space.  Most users should
1089    set data=PETSC_NULL.
1090 
1091    The user MUST specify either the local or global matrix dimensions
1092    (possibly both).
1093 
1094    Level: intermediate
1095 
1096 .keywords: matrix,dense, parallel
1097 
1098 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1099 @*/
1100 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1101 {
1102   int ierr,size;
1103 
1104   PetscFunctionBegin;
1105   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1106   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1107   if (size > 1) {
1108     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1109     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1110   } else {
1111     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1112     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatDuplicate_MPIDense"
1119 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1120 {
1121   Mat          mat;
1122   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1123   int          ierr;
1124 
1125   PetscFunctionBegin;
1126   *newmat       = 0;
1127   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1128   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1129   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1130   mat->data         = (void*)a;
1131   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1132   mat->factor       = A->factor;
1133   mat->assembled    = PETSC_TRUE;
1134   mat->preallocated = PETSC_TRUE;
1135 
1136   a->rstart       = oldmat->rstart;
1137   a->rend         = oldmat->rend;
1138   a->size         = oldmat->size;
1139   a->rank         = oldmat->rank;
1140   mat->insertmode = NOT_SET_VALUES;
1141   a->nvec         = oldmat->nvec;
1142   a->donotstash   = oldmat->donotstash;
1143   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1144   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1145   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1146   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1147 
1148   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1149   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1150   PetscLogObjectParent(mat,a->A);
1151   *newmat = mat;
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #include "petscsys.h"
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1159 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1160 {
1161   int        *rowners,i,size,rank,m,ierr,nz,j;
1162   Scalar     *array,*vals,*vals_ptr;
1163   MPI_Status status;
1164 
1165   PetscFunctionBegin;
1166   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1167   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1168 
1169   /* determine ownership of all rows */
1170   m          = M/size + ((M % size) > rank);
1171   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1172   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1173   rowners[0] = 0;
1174   for (i=2; i<=size; i++) {
1175     rowners[i] += rowners[i-1];
1176   }
1177 
1178   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1179   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1180 
1181   if (!rank) {
1182     ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr);
1183 
1184     /* read in my part of the matrix numerical values  */
1185     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1186 
1187     /* insert into matrix-by row (this is why cannot directly read into array */
1188     vals_ptr = vals;
1189     for (i=0; i<m; i++) {
1190       for (j=0; j<N; j++) {
1191         array[i + j*m] = *vals_ptr++;
1192       }
1193     }
1194 
1195     /* read in other processors and ship out */
1196     for (i=1; i<size; i++) {
1197       nz   = (rowners[i+1] - rowners[i])*N;
1198       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1199       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1200     }
1201   } else {
1202     /* receive numeric values */
1203     ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr);
1204 
1205     /* receive message of values*/
1206     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1207 
1208     /* insert into matrix-by row (this is why cannot directly read into array */
1209     vals_ptr = vals;
1210     for (i=0; i<m; i++) {
1211       for (j=0; j<N; j++) {
1212         array[i + j*m] = *vals_ptr++;
1213       }
1214     }
1215   }
1216   ierr = PetscFree(rowners);CHKERRQ(ierr);
1217   ierr = PetscFree(vals);CHKERRQ(ierr);
1218   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1219   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 EXTERN_C_BEGIN
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatLoad_MPIDense"
1226 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1227 {
1228   Mat          A;
1229   Scalar       *vals,*svals;
1230   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1231   MPI_Status   status;
1232   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1233   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1234   int          tag = ((PetscObject)viewer)->tag;
1235   int          i,nz,ierr,j,rstart,rend,fd;
1236 
1237   PetscFunctionBegin;
1238   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1239   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1240   if (!rank) {
1241     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1242     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1243     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1244   }
1245 
1246   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1247   M = header[1]; N = header[2]; nz = header[3];
1248 
1249   /*
1250        Handle case where matrix is stored on disk as a dense matrix
1251   */
1252   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1253     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1254     PetscFunctionReturn(0);
1255   }
1256 
1257   /* determine ownership of all rows */
1258   m          = M/size + ((M % size) > rank);
1259   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1260   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1261   rowners[0] = 0;
1262   for (i=2; i<=size; i++) {
1263     rowners[i] += rowners[i-1];
1264   }
1265   rstart = rowners[rank];
1266   rend   = rowners[rank+1];
1267 
1268   /* distribute row lengths to all processors */
1269   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1270   offlens = ourlens + (rend-rstart);
1271   if (!rank) {
1272     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1273     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1274     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1275     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1276     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1277     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1278   } else {
1279     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1280   }
1281 
1282   if (!rank) {
1283     /* calculate the number of nonzeros on each processor */
1284     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1285     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1286     for (i=0; i<size; i++) {
1287       for (j=rowners[i]; j< rowners[i+1]; j++) {
1288         procsnz[i] += rowlengths[j];
1289       }
1290     }
1291     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1292 
1293     /* determine max buffer needed and allocate it */
1294     maxnz = 0;
1295     for (i=0; i<size; i++) {
1296       maxnz = PetscMax(maxnz,procsnz[i]);
1297     }
1298     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1299 
1300     /* read in my part of the matrix column indices  */
1301     nz = procsnz[0];
1302     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1303     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1304 
1305     /* read in every one elses and ship off */
1306     for (i=1; i<size; i++) {
1307       nz   = procsnz[i];
1308       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1309       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1310     }
1311     ierr = PetscFree(cols);CHKERRQ(ierr);
1312   } else {
1313     /* determine buffer space needed for message */
1314     nz = 0;
1315     for (i=0; i<m; i++) {
1316       nz += ourlens[i];
1317     }
1318     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1319 
1320     /* receive message of column indices*/
1321     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1322     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1323     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1324   }
1325 
1326   /* loop over local rows, determining number of off diagonal entries */
1327   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1328   jj = 0;
1329   for (i=0; i<m; i++) {
1330     for (j=0; j<ourlens[i]; j++) {
1331       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1332       jj++;
1333     }
1334   }
1335 
1336   /* create our matrix */
1337   for (i=0; i<m; i++) {
1338     ourlens[i] -= offlens[i];
1339   }
1340   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1341   A = *newmat;
1342   for (i=0; i<m; i++) {
1343     ourlens[i] += offlens[i];
1344   }
1345 
1346   if (!rank) {
1347     ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr);
1348 
1349     /* read in my part of the matrix numerical values  */
1350     nz = procsnz[0];
1351     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1352 
1353     /* insert into matrix */
1354     jj      = rstart;
1355     smycols = mycols;
1356     svals   = vals;
1357     for (i=0; i<m; i++) {
1358       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1359       smycols += ourlens[i];
1360       svals   += ourlens[i];
1361       jj++;
1362     }
1363 
1364     /* read in other processors and ship out */
1365     for (i=1; i<size; i++) {
1366       nz   = procsnz[i];
1367       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1368       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1369     }
1370     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1371   } else {
1372     /* receive numeric values */
1373     ierr = PetscMalloc(nz*sizeof(Scalar),&vals);CHKERRQ(ierr);
1374 
1375     /* receive message of values*/
1376     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1377     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1378     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1379 
1380     /* insert into matrix */
1381     jj      = rstart;
1382     smycols = mycols;
1383     svals   = vals;
1384     for (i=0; i<m; i++) {
1385       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1386       smycols += ourlens[i];
1387       svals   += ourlens[i];
1388       jj++;
1389     }
1390   }
1391   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1392   ierr = PetscFree(vals);CHKERRQ(ierr);
1393   ierr = PetscFree(mycols);CHKERRQ(ierr);
1394   ierr = PetscFree(rowners);CHKERRQ(ierr);
1395 
1396   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1397   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 EXTERN_C_END
1401 
1402 
1403 
1404 
1405 
1406