xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 73f4d3771d9e6ab3f04055eab794d7609818b9d3)
1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNCT__
12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,ierr;
17   PetscScalar  *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
27   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
28   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN int MatSetUpMultiply_MPIDense(Mat);
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatSetValues_MPIDense"
42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv)
43 {
44   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
45   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
46   PetscTruth   roworiented = A->roworiented;
47 
48   PetscFunctionBegin;
49   for (i=0; i<m; i++) {
50     if (idxm[i] < 0) continue;
51     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
52     if (idxm[i] >= rstart && idxm[i] < rend) {
53       row = idxm[i] - rstart;
54       if (roworiented) {
55         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
56       } else {
57         for (j=0; j<n; j++) {
58           if (idxn[j] < 0) continue;
59           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
60           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
61         }
62       }
63     } else {
64       if (!A->donotstash) {
65         if (roworiented) {
66           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
67         } else {
68           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
69         }
70       }
71     }
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetValues_MPIDense"
78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
79 {
80   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
81   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82 
83   PetscFunctionBegin;
84   for (i=0; i<m; i++) {
85     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for (j=0; j<n; j++) {
90         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91         if (idxn[j] >= mat->N) {
92           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93         }
94         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
95       }
96     } else {
97       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98     }
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatGetArray_MPIDense"
105 int MatGetArray_MPIDense(Mat A,PetscScalar **array)
106 {
107   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108   int          ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118 {
119   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
122   PetscScalar  *av,*bv,*v = lmat->v;
123   Mat          newmat;
124 
125   PetscFunctionBegin;
126   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
127   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
128   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
129   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
130 
131   /* No parallel redistribution currently supported! Should really check each index set
132      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
133      original matrix! */
134 
135   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
136   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
137 
138   /* Check submatrix call */
139   if (scall == MAT_REUSE_MATRIX) {
140     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
141     /* Really need to test rows and column sizes! */
142     newmat = *B;
143   } else {
144     /* Create and fill new matrix */
145     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
146   }
147 
148   /* Now extract the data pointers and do the copy, column at a time */
149   newmatd = (Mat_MPIDense*)newmat->data;
150   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
151 
152   for (i=0; i<ncols; i++) {
153     av = v + nlrows*icol[i];
154     for (j=0; j<nrows; j++) {
155       *bv++ = av[irow[j] - rstart];
156     }
157   }
158 
159   /* Assemble the matrices so that the correct flags are set */
160   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
161   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
162 
163   /* Free work space */
164   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
165   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
166   *B = newmat;
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatRestoreArray_MPIDense"
172 int MatRestoreArray_MPIDense(Mat A,PetscScalar **array)
173 {
174   PetscFunctionBegin;
175   PetscFunctionReturn(0);
176 }
177 
178 #undef __FUNCT__
179 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
181 {
182   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
183   MPI_Comm     comm = mat->comm;
184   int          ierr,nstash,reallocs;
185   InsertMode   addv;
186 
187   PetscFunctionBegin;
188   /* make sure all processors are either in INSERTMODE or ADDMODE */
189   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
190   if (addv == (ADD_VALUES|INSERT_VALUES)) {
191     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
192   }
193   mat->insertmode = addv; /* in case this processor had no cache */
194 
195   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
196   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
197   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
198   PetscFunctionReturn(0);
199 }
200 
201 #undef __FUNCT__
202 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
204 {
205   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
206   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
207   PetscScalar  *val;
208   InsertMode   addv=mat->insertmode;
209 
210   PetscFunctionBegin;
211   /*  wait on receives */
212   while (1) {
213     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
214     if (!flg) break;
215 
216     for (i=0; i<n;) {
217       /* Now identify the consecutive vals belonging to the same row */
218       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
219       if (j < n) ncols = j-i;
220       else       ncols = n-i;
221       /* Now assemble all these values with a single function call */
222       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
223       i = j;
224     }
225   }
226   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
227 
228   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
229   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
230 
231   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
232     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatZeroEntries_MPIDense"
239 int MatZeroEntries_MPIDense(Mat A)
240 {
241   int          ierr;
242   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
243 
244   PetscFunctionBegin;
245   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatGetBlockSize_MPIDense"
251 int MatGetBlockSize_MPIDense(Mat A,int *bs)
252 {
253   PetscFunctionBegin;
254   *bs = 1;
255   PetscFunctionReturn(0);
256 }
257 
258 /* the code does not do the diagonal entries correctly unless the
259    matrix is square and the column and row owerships are identical.
260    This is a BUG. The only way to fix it seems to be to access
261    mdn->A and mdn->B directly and not through the MatZeroRows()
262    routine.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatZeroRows_MPIDense"
266 int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag)
267 {
268   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
269   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
270   int            *procs,*nprocs,j,idx,nsends,*work;
271   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
272   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
273   int            *lens,imdex,*lrows,*values;
274   MPI_Comm       comm = A->comm;
275   MPI_Request    *send_waits,*recv_waits;
276   MPI_Status     recv_status,*send_status;
277   IS             istmp;
278   PetscTruth     found;
279 
280   PetscFunctionBegin;
281   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
282   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
283 
284   /*  first count number of contributors to each processor */
285   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
286   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
287   procs = nprocs + size;
288   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
289   for (i=0; i<N; i++) {
290     idx = rows[i];
291     found = PETSC_FALSE;
292     for (j=0; j<size; j++) {
293       if (idx >= owners[j] && idx < owners[j+1]) {
294         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
295       }
296     }
297     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
298   }
299   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
300 
301   /* inform other processors of number of messages and max length*/
302   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
303   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
304   nmax   = work[rank];
305   nrecvs = work[size+rank];
306   ierr   = PetscFree(work);CHKERRQ(ierr);
307 
308   /* post receives:   */
309   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
310   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
311   for (i=0; i<nrecvs; i++) {
312     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
313   }
314 
315   /* do sends:
316       1) starts[i] gives the starting index in svalues for stuff going to
317          the ith processor
318   */
319   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
320   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
321   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
322   starts[0]  = 0;
323   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
324   for (i=0; i<N; i++) {
325     svalues[starts[owner[i]]++] = rows[i];
326   }
327   ISRestoreIndices(is,&rows);
328 
329   starts[0] = 0;
330   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
331   count = 0;
332   for (i=0; i<size; i++) {
333     if (procs[i]) {
334       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
335     }
336   }
337   ierr = PetscFree(starts);CHKERRQ(ierr);
338 
339   base = owners[rank];
340 
341   /*  wait on receives */
342   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
343   source = lens + nrecvs;
344   count  = nrecvs; slen = 0;
345   while (count) {
346     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
347     /* unpack receives into our local space */
348     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
349     source[imdex]  = recv_status.MPI_SOURCE;
350     lens[imdex]  = n;
351     slen += n;
352     count--;
353   }
354   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
355 
356   /* move the data into the send scatter */
357   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
358   count = 0;
359   for (i=0; i<nrecvs; i++) {
360     values = rvalues + i*nmax;
361     for (j=0; j<lens[i]; j++) {
362       lrows[count++] = values[j] - base;
363     }
364   }
365   ierr = PetscFree(rvalues);CHKERRQ(ierr);
366   ierr = PetscFree(lens);CHKERRQ(ierr);
367   ierr = PetscFree(owner);CHKERRQ(ierr);
368   ierr = PetscFree(nprocs);CHKERRQ(ierr);
369 
370   /* actually zap the local rows */
371   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
372   PetscLogObjectParent(A,istmp);
373   ierr = PetscFree(lrows);CHKERRQ(ierr);
374   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
375   ierr = ISDestroy(istmp);CHKERRQ(ierr);
376 
377   /* wait on sends */
378   if (nsends) {
379     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
380     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
381     ierr = PetscFree(send_status);CHKERRQ(ierr);
382   }
383   ierr = PetscFree(send_waits);CHKERRQ(ierr);
384   ierr = PetscFree(svalues);CHKERRQ(ierr);
385 
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatMult_MPIDense"
391 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
394   int          ierr;
395 
396   PetscFunctionBegin;
397   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
398   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
400   PetscFunctionReturn(0);
401 }
402 
403 #undef __FUNCT__
404 #define __FUNCT__ "MatMultAdd_MPIDense"
405 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
406 {
407   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
408   int          ierr;
409 
410   PetscFunctionBegin;
411   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
412   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatMultTranspose_MPIDense"
419 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
420 {
421   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
422   int          ierr;
423   PetscScalar  zero = 0.0;
424 
425   PetscFunctionBegin;
426   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
427   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
428   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
429   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
435 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
436 {
437   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
438   int          ierr;
439 
440   PetscFunctionBegin;
441   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
442   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
443   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
444   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatGetDiagonal_MPIDense"
450 int MatGetDiagonal_MPIDense(Mat A,Vec v)
451 {
452   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
453   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
454   int          ierr,len,i,n,m = A->m,radd;
455   PetscScalar  *x,zero = 0.0;
456 
457   PetscFunctionBegin;
458   ierr = VecSet(&zero,v);CHKERRQ(ierr);
459   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
460   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
461   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
462   len  = PetscMin(a->A->m,a->A->n);
463   radd = a->rstart*m;
464   for (i=0; i<len; i++) {
465     x[i] = aloc->v[radd + i*m + i];
466   }
467   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatDestroy_MPIDense"
473 int MatDestroy_MPIDense(Mat mat)
474 {
475   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
476   int          ierr;
477 
478   PetscFunctionBegin;
479 
480 #if defined(PETSC_USE_LOG)
481   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
482 #endif
483   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
484   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
485   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
486   if (mdn->lvec)   VecDestroy(mdn->lvec);
487   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
488   if (mdn->factor) {
489     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
490     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
491     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
492     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
493   }
494   ierr = PetscFree(mdn);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatView_MPIDense_Binary"
500 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
501 {
502   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
503   int          ierr;
504 
505   PetscFunctionBegin;
506   if (mdn->size == 1) {
507     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
508   }
509   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
515 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
516 {
517   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
518   int               ierr,size = mdn->size,rank = mdn->rank;
519   PetscViewerType   vtype;
520   PetscTruth        isascii,isdraw;
521   PetscViewer       sviewer;
522   PetscViewerFormat format;
523 
524   PetscFunctionBegin;
525   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
526   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
527   if (isascii) {
528     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
529     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
530     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
531       MatInfo info;
532       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
533       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
534                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
535       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
536       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
537       PetscFunctionReturn(0);
538     } else if (format == PETSC_VIEWER_ASCII_INFO) {
539       PetscFunctionReturn(0);
540     }
541   } else if (isdraw) {
542     PetscDraw       draw;
543     PetscTruth isnull;
544 
545     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
546     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
547     if (isnull) PetscFunctionReturn(0);
548   }
549 
550   if (size == 1) {
551     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
552   } else {
553     /* assemble the entire matrix onto first processor. */
554     Mat          A;
555     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
556     PetscScalar  *vals;
557 
558     if (!rank) {
559       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
560     } else {
561       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
562     }
563     PetscLogObjectParent(mat,A);
564 
565     /* Copy the matrix ... This isn't the most efficient means,
566        but it's quick for now */
567     row = mdn->rstart; m = mdn->A->m;
568     for (i=0; i<m; i++) {
569       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
570       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
571       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
572       row++;
573     }
574 
575     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
576     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
577     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
578     if (!rank) {
579       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
580     }
581     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
582     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
583     ierr = MatDestroy(A);CHKERRQ(ierr);
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatView_MPIDense"
590 int MatView_MPIDense(Mat mat,PetscViewer viewer)
591 {
592   int        ierr;
593   PetscTruth isascii,isbinary,isdraw,issocket;
594 
595   PetscFunctionBegin;
596 
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
601 
602   if (isascii || issocket || isdraw) {
603     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
604   } else if (isbinary) {
605     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
606   } else {
607     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatGetInfo_MPIDense"
614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
615 {
616   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
617   Mat          mdn = mat->A;
618   int          ierr;
619   PetscReal    isend[5],irecv[5];
620 
621   PetscFunctionBegin;
622   info->rows_global    = (double)A->M;
623   info->columns_global = (double)A->N;
624   info->rows_local     = (double)A->m;
625   info->columns_local  = (double)A->N;
626   info->block_size     = 1.0;
627   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
628   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
629   isend[3] = info->memory;  isend[4] = info->mallocs;
630   if (flag == MAT_LOCAL) {
631     info->nz_used      = isend[0];
632     info->nz_allocated = isend[1];
633     info->nz_unneeded  = isend[2];
634     info->memory       = isend[3];
635     info->mallocs      = isend[4];
636   } else if (flag == MAT_GLOBAL_MAX) {
637     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
638     info->nz_used      = irecv[0];
639     info->nz_allocated = irecv[1];
640     info->nz_unneeded  = irecv[2];
641     info->memory       = irecv[3];
642     info->mallocs      = irecv[4];
643   } else if (flag == MAT_GLOBAL_SUM) {
644     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
645     info->nz_used      = irecv[0];
646     info->nz_allocated = irecv[1];
647     info->nz_unneeded  = irecv[2];
648     info->memory       = irecv[3];
649     info->mallocs      = irecv[4];
650   }
651   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
652   info->fill_ratio_needed = 0;
653   info->factor_mallocs    = 0;
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatSetOption_MPIDense"
659 int MatSetOption_MPIDense(Mat A,MatOption op)
660 {
661   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
662   int          ierr;
663 
664   PetscFunctionBegin;
665   switch (op) {
666   case MAT_NO_NEW_NONZERO_LOCATIONS:
667   case MAT_YES_NEW_NONZERO_LOCATIONS:
668   case MAT_NEW_NONZERO_LOCATION_ERR:
669   case MAT_NEW_NONZERO_ALLOCATION_ERR:
670   case MAT_COLUMNS_SORTED:
671   case MAT_COLUMNS_UNSORTED:
672     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
673     break;
674   case MAT_ROW_ORIENTED:
675     a->roworiented = PETSC_TRUE;
676     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
677     break;
678   case MAT_ROWS_SORTED:
679   case MAT_ROWS_UNSORTED:
680   case MAT_YES_NEW_DIAGONALS:
681   case MAT_USE_HASH_TABLE:
682   case MAT_USE_SINGLE_PRECISION_SOLVES:
683     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
684     break;
685   case MAT_COLUMN_ORIENTED:
686     a->roworiented = PETSC_FALSE;
687     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
688     break;
689   case MAT_IGNORE_OFF_PROC_ENTRIES:
690     a->donotstash = PETSC_TRUE;
691     break;
692   case MAT_NO_NEW_DIAGONALS:
693     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
694     break;
695   default:
696     SETERRQ(PETSC_ERR_SUP,"unknown option");
697     break;
698   }
699   PetscFunctionReturn(0);
700 }
701 
702 #undef __FUNCT__
703 #define __FUNCT__ "MatGetOwnershipRange_MPIDense"
704 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
705 {
706   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
707 
708   PetscFunctionBegin;
709   if (m) *m = mat->rstart;
710   if (n) *n = mat->rend;
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatGetRow_MPIDense"
716 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
717 {
718   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
719   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
720 
721   PetscFunctionBegin;
722   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
723   lrow = row - rstart;
724   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatRestoreRow_MPIDense"
730 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
731 {
732   int ierr;
733 
734   PetscFunctionBegin;
735   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
736   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDiagonalScale_MPIDense"
742 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
743 {
744   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
745   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
746   PetscScalar  *l,*r,x,*v;
747   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
748 
749   PetscFunctionBegin;
750   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
751   if (ll) {
752     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
753     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
754     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
755     for (i=0; i<m; i++) {
756       x = l[i];
757       v = mat->v + i;
758       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
759     }
760     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
761     PetscLogFlops(n*m);
762   }
763   if (rr) {
764     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
765     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
766     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
767     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
768     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
769     for (i=0; i<n; i++) {
770       x = r[i];
771       v = mat->v + i*m;
772       for (j=0; j<m; j++) { (*v++) *= x;}
773     }
774     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
775     PetscLogFlops(n*m);
776   }
777   PetscFunctionReturn(0);
778 }
779 
780 #undef __FUNCT__
781 #define __FUNCT__ "MatNorm_MPIDense"
782 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm)
783 {
784   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
785   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
786   int          ierr,i,j;
787   PetscReal    sum = 0.0;
788   PetscScalar  *v = mat->v;
789 
790   PetscFunctionBegin;
791   if (mdn->size == 1) {
792     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
793   } else {
794     if (type == NORM_FROBENIUS) {
795       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
796 #if defined(PETSC_USE_COMPLEX)
797         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
798 #else
799         sum += (*v)*(*v); v++;
800 #endif
801       }
802       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
803       *norm = sqrt(*norm);
804       PetscLogFlops(2*mdn->A->n*mdn->A->m);
805     } else if (type == NORM_1) {
806       PetscReal *tmp,*tmp2;
807       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
808       tmp2 = tmp + A->N;
809       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
810       *norm = 0.0;
811       v = mat->v;
812       for (j=0; j<mdn->A->n; j++) {
813         for (i=0; i<mdn->A->m; i++) {
814           tmp[j] += PetscAbsScalar(*v);  v++;
815         }
816       }
817       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
818       for (j=0; j<A->N; j++) {
819         if (tmp2[j] > *norm) *norm = tmp2[j];
820       }
821       ierr = PetscFree(tmp);CHKERRQ(ierr);
822       PetscLogFlops(A->n*A->m);
823     } else if (type == NORM_INFINITY) { /* max row norm */
824       PetscReal ntemp;
825       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
826       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
827     } else {
828       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
829     }
830   }
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatTranspose_MPIDense"
836 int MatTranspose_MPIDense(Mat A,Mat *matout)
837 {
838   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
839   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
840   Mat          B;
841   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
842   int          j,i,ierr;
843   PetscScalar  *v;
844 
845   PetscFunctionBegin;
846   if (!matout && M != N) {
847     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
848   }
849   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
850 
851   m = a->A->m; n = a->A->n; v = Aloc->v;
852   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
853   for (j=0; j<n; j++) {
854     for (i=0; i<m; i++) rwork[i] = rstart + i;
855     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
856     v   += m;
857   }
858   ierr = PetscFree(rwork);CHKERRQ(ierr);
859   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
860   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861   if (matout) {
862     *matout = B;
863   } else {
864     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
865   }
866   PetscFunctionReturn(0);
867 }
868 
869 #include "petscblaslapack.h"
870 #undef __FUNCT__
871 #define __FUNCT__ "MatScale_MPIDense"
872 int MatScale_MPIDense(PetscScalar *alpha,Mat inA)
873 {
874   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
875   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
876   int          one = 1,nz;
877 
878   PetscFunctionBegin;
879   nz = inA->m*inA->N;
880   BLscal_(&nz,alpha,a->v,&one);
881   PetscLogFlops(nz);
882   PetscFunctionReturn(0);
883 }
884 
885 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
886 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
887 
888 #undef __FUNCT__
889 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
890 int MatSetUpPreallocation_MPIDense(Mat A)
891 {
892   int        ierr;
893 
894   PetscFunctionBegin;
895   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
896   PetscFunctionReturn(0);
897 }
898 
899 /* -------------------------------------------------------------------*/
900 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
901        MatGetRow_MPIDense,
902        MatRestoreRow_MPIDense,
903        MatMult_MPIDense,
904        MatMultAdd_MPIDense,
905        MatMultTranspose_MPIDense,
906        MatMultTransposeAdd_MPIDense,
907        0,
908        0,
909        0,
910        0,
911        0,
912        0,
913        0,
914        MatTranspose_MPIDense,
915        MatGetInfo_MPIDense,0,
916        MatGetDiagonal_MPIDense,
917        MatDiagonalScale_MPIDense,
918        MatNorm_MPIDense,
919        MatAssemblyBegin_MPIDense,
920        MatAssemblyEnd_MPIDense,
921        0,
922        MatSetOption_MPIDense,
923        MatZeroEntries_MPIDense,
924        MatZeroRows_MPIDense,
925        0,
926        0,
927        0,
928        0,
929        MatSetUpPreallocation_MPIDense,
930        0,
931        MatGetOwnershipRange_MPIDense,
932        0,
933        0,
934        MatGetArray_MPIDense,
935        MatRestoreArray_MPIDense,
936        MatDuplicate_MPIDense,
937        0,
938        0,
939        0,
940        0,
941        0,
942        MatGetSubMatrices_MPIDense,
943        0,
944        MatGetValues_MPIDense,
945        0,
946        0,
947        MatScale_MPIDense,
948        0,
949        0,
950        0,
951        MatGetBlockSize_MPIDense,
952        0,
953        0,
954        0,
955        0,
956        0,
957        0,
958        0,
959        0,
960        0,
961        MatGetSubMatrix_MPIDense,
962        MatDestroy_MPIDense,
963        MatView_MPIDense,
964        MatGetPetscMaps_Petsc};
965 
966 EXTERN_C_BEGIN
967 #undef __FUNCT__
968 #define __FUNCT__ "MatCreate_MPIDense"
969 int MatCreate_MPIDense(Mat mat)
970 {
971   Mat_MPIDense *a;
972   int          ierr,i;
973 
974   PetscFunctionBegin;
975   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
976   mat->data         = (void*)a;
977   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
978   mat->factor       = 0;
979   mat->mapping      = 0;
980 
981   a->factor       = 0;
982   mat->insertmode = NOT_SET_VALUES;
983   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
984   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
985 
986   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
987   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
988   a->nvec = mat->n;
989 
990   /* the information in the maps duplicates the information computed below, eventually
991      we should remove the duplicate information that is not contained in the maps */
992   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
993   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
994 
995   /* build local table of row and column ownerships */
996   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
997   a->cowners = a->rowners + a->size + 1;
998   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
999   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1000   a->rowners[0] = 0;
1001   for (i=2; i<=a->size; i++) {
1002     a->rowners[i] += a->rowners[i-1];
1003   }
1004   a->rstart = a->rowners[a->rank];
1005   a->rend   = a->rowners[a->rank+1];
1006   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1007   a->cowners[0] = 0;
1008   for (i=2; i<=a->size; i++) {
1009     a->cowners[i] += a->cowners[i-1];
1010   }
1011 
1012   /* build cache for off array entries formed */
1013   a->donotstash = PETSC_FALSE;
1014   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1015 
1016   /* stuff used for matrix vector multiply */
1017   a->lvec        = 0;
1018   a->Mvctx       = 0;
1019   a->roworiented = PETSC_TRUE;
1020 
1021   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1022                                      "MatGetDiagonalBlock_MPIDense",
1023                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 EXTERN_C_END
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1030 /*@C
1031    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1032 
1033    Not collective
1034 
1035    Input Parameters:
1036 .  A - the matrix
1037 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1038    to control all matrix memory allocation.
1039 
1040    Notes:
1041    The dense format is fully compatible with standard Fortran 77
1042    storage by columns.
1043 
1044    The data input variable is intended primarily for Fortran programmers
1045    who wish to allocate their own matrix memory space.  Most users should
1046    set data=PETSC_NULL.
1047 
1048    Level: intermediate
1049 
1050 .keywords: matrix,dense, parallel
1051 
1052 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1053 @*/
1054 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1055 {
1056   Mat_MPIDense *a;
1057   int          ierr;
1058   PetscTruth   flg2;
1059 
1060   PetscFunctionBegin;
1061   ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr);
1062   if (!flg2) PetscFunctionReturn(0);
1063   mat->preallocated = PETSC_TRUE;
1064   /* Note:  For now, when data is specified above, this assumes the user correctly
1065    allocates the local dense storage space.  We should add error checking. */
1066 
1067   a    = (Mat_MPIDense*)mat->data;
1068   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
1069   PetscLogObjectParent(mat,a->A);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatCreateMPIDense"
1075 /*@C
1076    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1077 
1078    Collective on MPI_Comm
1079 
1080    Input Parameters:
1081 +  comm - MPI communicator
1082 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1083 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1084 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1085 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1086 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1087    to control all matrix memory allocation.
1088 
1089    Output Parameter:
1090 .  A - the matrix
1091 
1092    Notes:
1093    The dense format is fully compatible with standard Fortran 77
1094    storage by columns.
1095 
1096    The data input variable is intended primarily for Fortran programmers
1097    who wish to allocate their own matrix memory space.  Most users should
1098    set data=PETSC_NULL.
1099 
1100    The user MUST specify either the local or global matrix dimensions
1101    (possibly both).
1102 
1103    Level: intermediate
1104 
1105 .keywords: matrix,dense, parallel
1106 
1107 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1108 @*/
1109 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1110 {
1111   int ierr,size;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1115   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1116   if (size > 1) {
1117     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1118     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1119   } else {
1120     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1121     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1122   }
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatDuplicate_MPIDense"
1128 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1129 {
1130   Mat          mat;
1131   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1132   int          ierr;
1133 
1134   PetscFunctionBegin;
1135   *newmat       = 0;
1136   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1137   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1138   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1139   mat->data         = (void*)a;
1140   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1141   mat->factor       = A->factor;
1142   mat->assembled    = PETSC_TRUE;
1143   mat->preallocated = PETSC_TRUE;
1144 
1145   a->rstart       = oldmat->rstart;
1146   a->rend         = oldmat->rend;
1147   a->size         = oldmat->size;
1148   a->rank         = oldmat->rank;
1149   mat->insertmode = NOT_SET_VALUES;
1150   a->nvec         = oldmat->nvec;
1151   a->donotstash   = oldmat->donotstash;
1152   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1153   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1154   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1155   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1156 
1157   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1158   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1159   PetscLogObjectParent(mat,a->A);
1160   *newmat = mat;
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #include "petscsys.h"
1165 
1166 #undef __FUNCT__
1167 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1168 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1169 {
1170   int          *rowners,i,size,rank,m,ierr,nz,j;
1171   PetscScalar  *array,*vals,*vals_ptr;
1172   MPI_Status   status;
1173 
1174   PetscFunctionBegin;
1175   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1176   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1177 
1178   /* determine ownership of all rows */
1179   m          = M/size + ((M % size) > rank);
1180   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1181   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1182   rowners[0] = 0;
1183   for (i=2; i<=size; i++) {
1184     rowners[i] += rowners[i-1];
1185   }
1186 
1187   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1188   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1189 
1190   if (!rank) {
1191     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1192 
1193     /* read in my part of the matrix numerical values  */
1194     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1195 
1196     /* insert into matrix-by row (this is why cannot directly read into array */
1197     vals_ptr = vals;
1198     for (i=0; i<m; i++) {
1199       for (j=0; j<N; j++) {
1200         array[i + j*m] = *vals_ptr++;
1201       }
1202     }
1203 
1204     /* read in other processors and ship out */
1205     for (i=1; i<size; i++) {
1206       nz   = (rowners[i+1] - rowners[i])*N;
1207       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1208       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1209     }
1210   } else {
1211     /* receive numeric values */
1212     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1213 
1214     /* receive message of values*/
1215     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1216 
1217     /* insert into matrix-by row (this is why cannot directly read into array */
1218     vals_ptr = vals;
1219     for (i=0; i<m; i++) {
1220       for (j=0; j<N; j++) {
1221         array[i + j*m] = *vals_ptr++;
1222       }
1223     }
1224   }
1225   ierr = PetscFree(rowners);CHKERRQ(ierr);
1226   ierr = PetscFree(vals);CHKERRQ(ierr);
1227   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1228   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 EXTERN_C_BEGIN
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatLoad_MPIDense"
1235 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1236 {
1237   Mat          A;
1238   PetscScalar  *vals,*svals;
1239   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1240   MPI_Status   status;
1241   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1242   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1243   int          tag = ((PetscObject)viewer)->tag;
1244   int          i,nz,ierr,j,rstart,rend,fd;
1245 
1246   PetscFunctionBegin;
1247   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1249   if (!rank) {
1250     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1251     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1252     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1253   }
1254 
1255   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1256   M = header[1]; N = header[2]; nz = header[3];
1257 
1258   /*
1259        Handle case where matrix is stored on disk as a dense matrix
1260   */
1261   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1262     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1263     PetscFunctionReturn(0);
1264   }
1265 
1266   /* determine ownership of all rows */
1267   m          = M/size + ((M % size) > rank);
1268   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1269   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1270   rowners[0] = 0;
1271   for (i=2; i<=size; i++) {
1272     rowners[i] += rowners[i-1];
1273   }
1274   rstart = rowners[rank];
1275   rend   = rowners[rank+1];
1276 
1277   /* distribute row lengths to all processors */
1278   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1279   offlens = ourlens + (rend-rstart);
1280   if (!rank) {
1281     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1282     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1283     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1284     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1285     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1286     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1287   } else {
1288     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1289   }
1290 
1291   if (!rank) {
1292     /* calculate the number of nonzeros on each processor */
1293     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1294     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1295     for (i=0; i<size; i++) {
1296       for (j=rowners[i]; j< rowners[i+1]; j++) {
1297         procsnz[i] += rowlengths[j];
1298       }
1299     }
1300     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1301 
1302     /* determine max buffer needed and allocate it */
1303     maxnz = 0;
1304     for (i=0; i<size; i++) {
1305       maxnz = PetscMax(maxnz,procsnz[i]);
1306     }
1307     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1308 
1309     /* read in my part of the matrix column indices  */
1310     nz = procsnz[0];
1311     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1312     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1313 
1314     /* read in every one elses and ship off */
1315     for (i=1; i<size; i++) {
1316       nz   = procsnz[i];
1317       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1318       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1319     }
1320     ierr = PetscFree(cols);CHKERRQ(ierr);
1321   } else {
1322     /* determine buffer space needed for message */
1323     nz = 0;
1324     for (i=0; i<m; i++) {
1325       nz += ourlens[i];
1326     }
1327     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1328 
1329     /* receive message of column indices*/
1330     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1331     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1332     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1333   }
1334 
1335   /* loop over local rows, determining number of off diagonal entries */
1336   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1337   jj = 0;
1338   for (i=0; i<m; i++) {
1339     for (j=0; j<ourlens[i]; j++) {
1340       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1341       jj++;
1342     }
1343   }
1344 
1345   /* create our matrix */
1346   for (i=0; i<m; i++) {
1347     ourlens[i] -= offlens[i];
1348   }
1349   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1350   A = *newmat;
1351   for (i=0; i<m; i++) {
1352     ourlens[i] += offlens[i];
1353   }
1354 
1355   if (!rank) {
1356     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1357 
1358     /* read in my part of the matrix numerical values  */
1359     nz = procsnz[0];
1360     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1361 
1362     /* insert into matrix */
1363     jj      = rstart;
1364     smycols = mycols;
1365     svals   = vals;
1366     for (i=0; i<m; i++) {
1367       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1368       smycols += ourlens[i];
1369       svals   += ourlens[i];
1370       jj++;
1371     }
1372 
1373     /* read in other processors and ship out */
1374     for (i=1; i<size; i++) {
1375       nz   = procsnz[i];
1376       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1377       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1378     }
1379     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1380   } else {
1381     /* receive numeric values */
1382     ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1383 
1384     /* receive message of values*/
1385     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1386     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1387     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1388 
1389     /* insert into matrix */
1390     jj      = rstart;
1391     smycols = mycols;
1392     svals   = vals;
1393     for (i=0; i<m; i++) {
1394       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1395       smycols += ourlens[i];
1396       svals   += ourlens[i];
1397       jj++;
1398     }
1399   }
1400   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1401   ierr = PetscFree(vals);CHKERRQ(ierr);
1402   ierr = PetscFree(mycols);CHKERRQ(ierr);
1403   ierr = PetscFree(rowners);CHKERRQ(ierr);
1404 
1405   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1406   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 EXTERN_C_END
1410 
1411 
1412 
1413 
1414 
1415