xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision a23d5ecec69aaee6ed47a85ba9d72960301bf762)
1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNCT__
12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,ierr;
17   PetscScalar  *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
27   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
28   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN int MatSetUpMultiply_MPIDense(Mat);
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatSetValues_MPIDense"
42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv)
43 {
44   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
45   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
46   PetscTruth   roworiented = A->roworiented;
47 
48   PetscFunctionBegin;
49   for (i=0; i<m; i++) {
50     if (idxm[i] < 0) continue;
51     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
52     if (idxm[i] >= rstart && idxm[i] < rend) {
53       row = idxm[i] - rstart;
54       if (roworiented) {
55         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
56       } else {
57         for (j=0; j<n; j++) {
58           if (idxn[j] < 0) continue;
59           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
60           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
61         }
62       }
63     } else {
64       if (!A->donotstash) {
65         if (roworiented) {
66           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
67         } else {
68           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
69         }
70       }
71     }
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetValues_MPIDense"
78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
79 {
80   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
81   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82 
83   PetscFunctionBegin;
84   for (i=0; i<m; i++) {
85     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for (j=0; j<n; j++) {
90         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91         if (idxn[j] >= mat->N) {
92           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93         }
94         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
95       }
96     } else {
97       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98     }
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatGetArray_MPIDense"
105 int MatGetArray_MPIDense(Mat A,PetscScalar **array)
106 {
107   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108   int          ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118 {
119   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
122   PetscScalar  *av,*bv,*v = lmat->v;
123   Mat          newmat;
124 
125   PetscFunctionBegin;
126   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
127   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
128   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
129   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
130 
131   /* No parallel redistribution currently supported! Should really check each index set
132      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
133      original matrix! */
134 
135   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
136   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
137 
138   /* Check submatrix call */
139   if (scall == MAT_REUSE_MATRIX) {
140     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
141     /* Really need to test rows and column sizes! */
142     newmat = *B;
143   } else {
144     /* Create and fill new matrix */
145     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
146   }
147 
148   /* Now extract the data pointers and do the copy, column at a time */
149   newmatd = (Mat_MPIDense*)newmat->data;
150   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
151 
152   for (i=0; i<ncols; i++) {
153     av = v + nlrows*icol[i];
154     for (j=0; j<nrows; j++) {
155       *bv++ = av[irow[j] - rstart];
156     }
157   }
158 
159   /* Assemble the matrices so that the correct flags are set */
160   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
161   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
162 
163   /* Free work space */
164   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
165   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
166   *B = newmat;
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatRestoreArray_MPIDense"
172 int MatRestoreArray_MPIDense(Mat A,PetscScalar **array)
173 {
174   PetscFunctionBegin;
175   PetscFunctionReturn(0);
176 }
177 
178 #undef __FUNCT__
179 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
181 {
182   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
183   MPI_Comm     comm = mat->comm;
184   int          ierr,nstash,reallocs;
185   InsertMode   addv;
186 
187   PetscFunctionBegin;
188   /* make sure all processors are either in INSERTMODE or ADDMODE */
189   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
190   if (addv == (ADD_VALUES|INSERT_VALUES)) {
191     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
192   }
193   mat->insertmode = addv; /* in case this processor had no cache */
194 
195   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
196   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
197   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
198   PetscFunctionReturn(0);
199 }
200 
201 #undef __FUNCT__
202 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
204 {
205   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
206   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
207   PetscScalar  *val;
208   InsertMode   addv=mat->insertmode;
209 
210   PetscFunctionBegin;
211   /*  wait on receives */
212   while (1) {
213     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
214     if (!flg) break;
215 
216     for (i=0; i<n;) {
217       /* Now identify the consecutive vals belonging to the same row */
218       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
219       if (j < n) ncols = j-i;
220       else       ncols = n-i;
221       /* Now assemble all these values with a single function call */
222       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
223       i = j;
224     }
225   }
226   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
227 
228   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
229   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
230 
231   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
232     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatZeroEntries_MPIDense"
239 int MatZeroEntries_MPIDense(Mat A)
240 {
241   int          ierr;
242   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
243 
244   PetscFunctionBegin;
245   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatGetBlockSize_MPIDense"
251 int MatGetBlockSize_MPIDense(Mat A,int *bs)
252 {
253   PetscFunctionBegin;
254   *bs = 1;
255   PetscFunctionReturn(0);
256 }
257 
258 /* the code does not do the diagonal entries correctly unless the
259    matrix is square and the column and row owerships are identical.
260    This is a BUG. The only way to fix it seems to be to access
261    mdn->A and mdn->B directly and not through the MatZeroRows()
262    routine.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatZeroRows_MPIDense"
266 int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag)
267 {
268   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
269   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
270   int            *nprocs,j,idx,nsends;
271   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
272   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
273   int            *lens,imdex,*lrows,*values;
274   MPI_Comm       comm = A->comm;
275   MPI_Request    *send_waits,*recv_waits;
276   MPI_Status     recv_status,*send_status;
277   IS             istmp;
278   PetscTruth     found;
279 
280   PetscFunctionBegin;
281   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
282   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
283 
284   /*  first count number of contributors to each processor */
285   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
286   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
287   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
288   for (i=0; i<N; i++) {
289     idx = rows[i];
290     found = PETSC_FALSE;
291     for (j=0; j<size; j++) {
292       if (idx >= owners[j] && idx < owners[j+1]) {
293         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
294       }
295     }
296     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
297   }
298   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
299 
300   /* inform other processors of number of messages and max length*/
301   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
302 
303   /* post receives:   */
304   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
305   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
306   for (i=0; i<nrecvs; i++) {
307     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
308   }
309 
310   /* do sends:
311       1) starts[i] gives the starting index in svalues for stuff going to
312          the ith processor
313   */
314   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
315   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
316   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
317   starts[0]  = 0;
318   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
319   for (i=0; i<N; i++) {
320     svalues[starts[owner[i]]++] = rows[i];
321   }
322   ISRestoreIndices(is,&rows);
323 
324   starts[0] = 0;
325   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
326   count = 0;
327   for (i=0; i<size; i++) {
328     if (nprocs[2*i+1]) {
329       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
330     }
331   }
332   ierr = PetscFree(starts);CHKERRQ(ierr);
333 
334   base = owners[rank];
335 
336   /*  wait on receives */
337   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
338   source = lens + nrecvs;
339   count  = nrecvs; slen = 0;
340   while (count) {
341     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
342     /* unpack receives into our local space */
343     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
344     source[imdex]  = recv_status.MPI_SOURCE;
345     lens[imdex]  = n;
346     slen += n;
347     count--;
348   }
349   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
350 
351   /* move the data into the send scatter */
352   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
353   count = 0;
354   for (i=0; i<nrecvs; i++) {
355     values = rvalues + i*nmax;
356     for (j=0; j<lens[i]; j++) {
357       lrows[count++] = values[j] - base;
358     }
359   }
360   ierr = PetscFree(rvalues);CHKERRQ(ierr);
361   ierr = PetscFree(lens);CHKERRQ(ierr);
362   ierr = PetscFree(owner);CHKERRQ(ierr);
363   ierr = PetscFree(nprocs);CHKERRQ(ierr);
364 
365   /* actually zap the local rows */
366   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
367   PetscLogObjectParent(A,istmp);
368   ierr = PetscFree(lrows);CHKERRQ(ierr);
369   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
370   ierr = ISDestroy(istmp);CHKERRQ(ierr);
371 
372   /* wait on sends */
373   if (nsends) {
374     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
375     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
376     ierr = PetscFree(send_status);CHKERRQ(ierr);
377   }
378   ierr = PetscFree(send_waits);CHKERRQ(ierr);
379   ierr = PetscFree(svalues);CHKERRQ(ierr);
380 
381   PetscFunctionReturn(0);
382 }
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "MatMult_MPIDense"
386 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
387 {
388   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
389   int          ierr;
390 
391   PetscFunctionBegin;
392   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
393   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
394   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 #undef __FUNCT__
399 #define __FUNCT__ "MatMultAdd_MPIDense"
400 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
401 {
402   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
403   int          ierr;
404 
405   PetscFunctionBegin;
406   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
407   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
408   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "MatMultTranspose_MPIDense"
414 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
415 {
416   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
417   int          ierr;
418   PetscScalar  zero = 0.0;
419 
420   PetscFunctionBegin;
421   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
422   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
423   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
424   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
430 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
431 {
432   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
433   int          ierr;
434 
435   PetscFunctionBegin;
436   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
437   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
438   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
439   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatGetDiagonal_MPIDense"
445 int MatGetDiagonal_MPIDense(Mat A,Vec v)
446 {
447   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
448   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
449   int          ierr,len,i,n,m = A->m,radd;
450   PetscScalar  *x,zero = 0.0;
451 
452   PetscFunctionBegin;
453   ierr = VecSet(&zero,v);CHKERRQ(ierr);
454   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
455   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
456   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
457   len  = PetscMin(a->A->m,a->A->n);
458   radd = a->rstart*m;
459   for (i=0; i<len; i++) {
460     x[i] = aloc->v[radd + i*m + i];
461   }
462   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatDestroy_MPIDense"
468 int MatDestroy_MPIDense(Mat mat)
469 {
470   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
471   int          ierr;
472 
473   PetscFunctionBegin;
474 
475 #if defined(PETSC_USE_LOG)
476   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
477 #endif
478   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
479   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
480   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
481   if (mdn->lvec)   VecDestroy(mdn->lvec);
482   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
483   if (mdn->factor) {
484     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
485     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
486     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
487     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
488   }
489   ierr = PetscFree(mdn);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatView_MPIDense_Binary"
495 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
496 {
497   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
498   int          ierr;
499 
500   PetscFunctionBegin;
501   if (mdn->size == 1) {
502     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
503   }
504   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
510 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
511 {
512   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
513   int               ierr,size = mdn->size,rank = mdn->rank;
514   PetscViewerType   vtype;
515   PetscTruth        isascii,isdraw;
516   PetscViewer       sviewer;
517   PetscViewerFormat format;
518 
519   PetscFunctionBegin;
520   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
521   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
522   if (isascii) {
523     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
524     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
525     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
526       MatInfo info;
527       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
528       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
529                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
530       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
531       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
532       PetscFunctionReturn(0);
533     } else if (format == PETSC_VIEWER_ASCII_INFO) {
534       PetscFunctionReturn(0);
535     }
536   } else if (isdraw) {
537     PetscDraw       draw;
538     PetscTruth isnull;
539 
540     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
541     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
542     if (isnull) PetscFunctionReturn(0);
543   }
544 
545   if (size == 1) {
546     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
547   } else {
548     /* assemble the entire matrix onto first processor. */
549     Mat          A;
550     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
551     PetscScalar  *vals;
552 
553     if (!rank) {
554       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
555     } else {
556       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
557     }
558     PetscLogObjectParent(mat,A);
559 
560     /* Copy the matrix ... This isn't the most efficient means,
561        but it's quick for now */
562     row = mdn->rstart; m = mdn->A->m;
563     for (i=0; i<m; i++) {
564       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
565       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
566       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
567       row++;
568     }
569 
570     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
571     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
572     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
573     if (!rank) {
574       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
575     }
576     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
577     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
578     ierr = MatDestroy(A);CHKERRQ(ierr);
579   }
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatView_MPIDense"
585 int MatView_MPIDense(Mat mat,PetscViewer viewer)
586 {
587   int        ierr;
588   PetscTruth isascii,isbinary,isdraw,issocket;
589 
590   PetscFunctionBegin;
591 
592   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
593   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
594   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
595   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
596 
597   if (isascii || issocket || isdraw) {
598     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
599   } else if (isbinary) {
600     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
601   } else {
602     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatGetInfo_MPIDense"
609 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
610 {
611   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
612   Mat          mdn = mat->A;
613   int          ierr;
614   PetscReal    isend[5],irecv[5];
615 
616   PetscFunctionBegin;
617   info->rows_global    = (double)A->M;
618   info->columns_global = (double)A->N;
619   info->rows_local     = (double)A->m;
620   info->columns_local  = (double)A->N;
621   info->block_size     = 1.0;
622   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
623   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
624   isend[3] = info->memory;  isend[4] = info->mallocs;
625   if (flag == MAT_LOCAL) {
626     info->nz_used      = isend[0];
627     info->nz_allocated = isend[1];
628     info->nz_unneeded  = isend[2];
629     info->memory       = isend[3];
630     info->mallocs      = isend[4];
631   } else if (flag == MAT_GLOBAL_MAX) {
632     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
633     info->nz_used      = irecv[0];
634     info->nz_allocated = irecv[1];
635     info->nz_unneeded  = irecv[2];
636     info->memory       = irecv[3];
637     info->mallocs      = irecv[4];
638   } else if (flag == MAT_GLOBAL_SUM) {
639     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
640     info->nz_used      = irecv[0];
641     info->nz_allocated = irecv[1];
642     info->nz_unneeded  = irecv[2];
643     info->memory       = irecv[3];
644     info->mallocs      = irecv[4];
645   }
646   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
647   info->fill_ratio_needed = 0;
648   info->factor_mallocs    = 0;
649   PetscFunctionReturn(0);
650 }
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatSetOption_MPIDense"
654 int MatSetOption_MPIDense(Mat A,MatOption op)
655 {
656   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
657   int          ierr;
658 
659   PetscFunctionBegin;
660   switch (op) {
661   case MAT_NO_NEW_NONZERO_LOCATIONS:
662   case MAT_YES_NEW_NONZERO_LOCATIONS:
663   case MAT_NEW_NONZERO_LOCATION_ERR:
664   case MAT_NEW_NONZERO_ALLOCATION_ERR:
665   case MAT_COLUMNS_SORTED:
666   case MAT_COLUMNS_UNSORTED:
667     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
668     break;
669   case MAT_ROW_ORIENTED:
670     a->roworiented = PETSC_TRUE;
671     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
672     break;
673   case MAT_ROWS_SORTED:
674   case MAT_ROWS_UNSORTED:
675   case MAT_YES_NEW_DIAGONALS:
676   case MAT_USE_HASH_TABLE:
677     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
678     break;
679   case MAT_COLUMN_ORIENTED:
680     a->roworiented = PETSC_FALSE;
681     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
682     break;
683   case MAT_IGNORE_OFF_PROC_ENTRIES:
684     a->donotstash = PETSC_TRUE;
685     break;
686   case MAT_NO_NEW_DIAGONALS:
687     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
688   default:
689     SETERRQ(PETSC_ERR_SUP,"unknown option");
690   }
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetRow_MPIDense"
696 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
697 {
698   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
699   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
700 
701   PetscFunctionBegin;
702   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
703   lrow = row - rstart;
704   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatRestoreRow_MPIDense"
710 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
711 {
712   int ierr;
713 
714   PetscFunctionBegin;
715   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
716   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatDiagonalScale_MPIDense"
722 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
723 {
724   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
725   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
726   PetscScalar  *l,*r,x,*v;
727   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
728 
729   PetscFunctionBegin;
730   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
731   if (ll) {
732     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
733     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
734     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
735     for (i=0; i<m; i++) {
736       x = l[i];
737       v = mat->v + i;
738       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
739     }
740     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
741     PetscLogFlops(n*m);
742   }
743   if (rr) {
744     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
745     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
746     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
747     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
748     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
749     for (i=0; i<n; i++) {
750       x = r[i];
751       v = mat->v + i*m;
752       for (j=0; j<m; j++) { (*v++) *= x;}
753     }
754     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
755     PetscLogFlops(n*m);
756   }
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatNorm_MPIDense"
762 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
763 {
764   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
765   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
766   int          ierr,i,j;
767   PetscReal    sum = 0.0;
768   PetscScalar  *v = mat->v;
769 
770   PetscFunctionBegin;
771   if (mdn->size == 1) {
772     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
773   } else {
774     if (type == NORM_FROBENIUS) {
775       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
776 #if defined(PETSC_USE_COMPLEX)
777         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
778 #else
779         sum += (*v)*(*v); v++;
780 #endif
781       }
782       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
783       *nrm = sqrt(*nrm);
784       PetscLogFlops(2*mdn->A->n*mdn->A->m);
785     } else if (type == NORM_1) {
786       PetscReal *tmp,*tmp2;
787       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
788       tmp2 = tmp + A->N;
789       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
790       *nrm = 0.0;
791       v = mat->v;
792       for (j=0; j<mdn->A->n; j++) {
793         for (i=0; i<mdn->A->m; i++) {
794           tmp[j] += PetscAbsScalar(*v);  v++;
795         }
796       }
797       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
798       for (j=0; j<A->N; j++) {
799         if (tmp2[j] > *nrm) *nrm = tmp2[j];
800       }
801       ierr = PetscFree(tmp);CHKERRQ(ierr);
802       PetscLogFlops(A->n*A->m);
803     } else if (type == NORM_INFINITY) { /* max row norm */
804       PetscReal ntemp;
805       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
806       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
807     } else {
808       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
809     }
810   }
811   PetscFunctionReturn(0);
812 }
813 
814 #undef __FUNCT__
815 #define __FUNCT__ "MatTranspose_MPIDense"
816 int MatTranspose_MPIDense(Mat A,Mat *matout)
817 {
818   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
819   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
820   Mat          B;
821   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
822   int          j,i,ierr;
823   PetscScalar  *v;
824 
825   PetscFunctionBegin;
826   if (!matout && M != N) {
827     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
828   }
829   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
830 
831   m = a->A->m; n = a->A->n; v = Aloc->v;
832   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
833   for (j=0; j<n; j++) {
834     for (i=0; i<m; i++) rwork[i] = rstart + i;
835     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
836     v   += m;
837   }
838   ierr = PetscFree(rwork);CHKERRQ(ierr);
839   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
840   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
841   if (matout) {
842     *matout = B;
843   } else {
844     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
845   }
846   PetscFunctionReturn(0);
847 }
848 
849 #include "petscblaslapack.h"
850 #undef __FUNCT__
851 #define __FUNCT__ "MatScale_MPIDense"
852 int MatScale_MPIDense(PetscScalar *alpha,Mat inA)
853 {
854   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
855   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
856   int          one = 1,nz;
857 
858   PetscFunctionBegin;
859   nz = inA->m*inA->N;
860   BLscal_(&nz,alpha,a->v,&one);
861   PetscLogFlops(nz);
862   PetscFunctionReturn(0);
863 }
864 
865 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
866 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
870 int MatSetUpPreallocation_MPIDense(Mat A)
871 {
872   int        ierr;
873 
874   PetscFunctionBegin;
875   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 /* -------------------------------------------------------------------*/
880 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
881        MatGetRow_MPIDense,
882        MatRestoreRow_MPIDense,
883        MatMult_MPIDense,
884        MatMultAdd_MPIDense,
885        MatMultTranspose_MPIDense,
886        MatMultTransposeAdd_MPIDense,
887        0,
888        0,
889        0,
890        0,
891        0,
892        0,
893        0,
894        MatTranspose_MPIDense,
895        MatGetInfo_MPIDense,0,
896        MatGetDiagonal_MPIDense,
897        MatDiagonalScale_MPIDense,
898        MatNorm_MPIDense,
899        MatAssemblyBegin_MPIDense,
900        MatAssemblyEnd_MPIDense,
901        0,
902        MatSetOption_MPIDense,
903        MatZeroEntries_MPIDense,
904        MatZeroRows_MPIDense,
905        0,
906        0,
907        0,
908        0,
909        MatSetUpPreallocation_MPIDense,
910        0,
911        0,
912        MatGetArray_MPIDense,
913        MatRestoreArray_MPIDense,
914        MatDuplicate_MPIDense,
915        0,
916        0,
917        0,
918        0,
919        0,
920        MatGetSubMatrices_MPIDense,
921        0,
922        MatGetValues_MPIDense,
923        0,
924        0,
925        MatScale_MPIDense,
926        0,
927        0,
928        0,
929        MatGetBlockSize_MPIDense,
930        0,
931        0,
932        0,
933        0,
934        0,
935        0,
936        0,
937        0,
938        0,
939        MatGetSubMatrix_MPIDense,
940        MatDestroy_MPIDense,
941        MatView_MPIDense,
942        MatGetPetscMaps_Petsc};
943 
944 EXTERN_C_BEGIN
945 #undef __FUNCT__
946 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
947 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
948 {
949   Mat_MPIDense *a;
950   int          ierr;
951   PetscTruth   flg2;
952 
953   PetscFunctionBegin;
954   mat->preallocated = PETSC_TRUE;
955   /* Note:  For now, when data is specified above, this assumes the user correctly
956    allocates the local dense storage space.  We should add error checking. */
957 
958   a    = (Mat_MPIDense*)mat->data;
959   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
960   PetscLogObjectParent(mat,a->A);
961   PetscFunctionReturn(0);
962 }
963 EXTERN_C_END
964 
965 EXTERN_C_BEGIN
966 #undef __FUNCT__
967 #define __FUNCT__ "MatCreate_MPIDense"
968 int MatCreate_MPIDense(Mat mat)
969 {
970   Mat_MPIDense *a;
971   int          ierr,i;
972 
973   PetscFunctionBegin;
974   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
975   mat->data         = (void*)a;
976   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
977   mat->factor       = 0;
978   mat->mapping      = 0;
979 
980   a->factor       = 0;
981   mat->insertmode = NOT_SET_VALUES;
982   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
983   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
984 
985   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
986   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
987   a->nvec = mat->n;
988 
989   /* the information in the maps duplicates the information computed below, eventually
990      we should remove the duplicate information that is not contained in the maps */
991   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
992   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
993 
994   /* build local table of row and column ownerships */
995   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
996   a->cowners = a->rowners + a->size + 1;
997   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
998   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
999   a->rowners[0] = 0;
1000   for (i=2; i<=a->size; i++) {
1001     a->rowners[i] += a->rowners[i-1];
1002   }
1003   a->rstart = a->rowners[a->rank];
1004   a->rend   = a->rowners[a->rank+1];
1005   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1006   a->cowners[0] = 0;
1007   for (i=2; i<=a->size; i++) {
1008     a->cowners[i] += a->cowners[i-1];
1009   }
1010 
1011   /* build cache for off array entries formed */
1012   a->donotstash = PETSC_FALSE;
1013   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1014 
1015   /* stuff used for matrix vector multiply */
1016   a->lvec        = 0;
1017   a->Mvctx       = 0;
1018   a->roworiented = PETSC_TRUE;
1019 
1020   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1021                                      "MatGetDiagonalBlock_MPIDense",
1022                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1023   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1024                                      "MatMPIDenseSetPreallocation_MPIDense",
1025                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 EXTERN_C_END
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1032 /*@C
1033    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1034 
1035    Not collective
1036 
1037    Input Parameters:
1038 .  A - the matrix
1039 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1040    to control all matrix memory allocation.
1041 
1042    Notes:
1043    The dense format is fully compatible with standard Fortran 77
1044    storage by columns.
1045 
1046    The data input variable is intended primarily for Fortran programmers
1047    who wish to allocate their own matrix memory space.  Most users should
1048    set data=PETSC_NULL.
1049 
1050    Level: intermediate
1051 
1052 .keywords: matrix,dense, parallel
1053 
1054 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1055 @*/
1056 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1057 {
1058   int ierr,(*f)(Mat,PetscScalar *);
1059 
1060   PetscFunctionBegin;
1061   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIAdjSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1062   if (f) {
1063     ierr = (*f)(mat,data);CHKERRQ(ierr);
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatCreateMPIDense"
1070 /*@C
1071    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1072 
1073    Collective on MPI_Comm
1074 
1075    Input Parameters:
1076 +  comm - MPI communicator
1077 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1078 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1079 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1080 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1081 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1082    to control all matrix memory allocation.
1083 
1084    Output Parameter:
1085 .  A - the matrix
1086 
1087    Notes:
1088    The dense format is fully compatible with standard Fortran 77
1089    storage by columns.
1090 
1091    The data input variable is intended primarily for Fortran programmers
1092    who wish to allocate their own matrix memory space.  Most users should
1093    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1094 
1095    The user MUST specify either the local or global matrix dimensions
1096    (possibly both).
1097 
1098    Level: intermediate
1099 
1100 .keywords: matrix,dense, parallel
1101 
1102 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1103 @*/
1104 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1105 {
1106   int ierr,size;
1107 
1108   PetscFunctionBegin;
1109   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1111   if (size > 1) {
1112     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1113     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1114   } else {
1115     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1116     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1117   }
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatDuplicate_MPIDense"
1123 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1124 {
1125   Mat          mat;
1126   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1127   int          ierr;
1128 
1129   PetscFunctionBegin;
1130   *newmat       = 0;
1131   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1132   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1133   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1134   mat->data         = (void*)a;
1135   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1136   mat->factor       = A->factor;
1137   mat->assembled    = PETSC_TRUE;
1138   mat->preallocated = PETSC_TRUE;
1139 
1140   a->rstart       = oldmat->rstart;
1141   a->rend         = oldmat->rend;
1142   a->size         = oldmat->size;
1143   a->rank         = oldmat->rank;
1144   mat->insertmode = NOT_SET_VALUES;
1145   a->nvec         = oldmat->nvec;
1146   a->donotstash   = oldmat->donotstash;
1147   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1148   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1149   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1150   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1151 
1152   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1153   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1154   PetscLogObjectParent(mat,a->A);
1155   *newmat = mat;
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #include "petscsys.h"
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1163 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1164 {
1165   int          *rowners,i,size,rank,m,ierr,nz,j;
1166   PetscScalar  *array,*vals,*vals_ptr;
1167   MPI_Status   status;
1168 
1169   PetscFunctionBegin;
1170   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1171   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1172 
1173   /* determine ownership of all rows */
1174   m          = M/size + ((M % size) > rank);
1175   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1176   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1177   rowners[0] = 0;
1178   for (i=2; i<=size; i++) {
1179     rowners[i] += rowners[i-1];
1180   }
1181 
1182   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1183   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1184 
1185   if (!rank) {
1186     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1187 
1188     /* read in my part of the matrix numerical values  */
1189     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1190 
1191     /* insert into matrix-by row (this is why cannot directly read into array */
1192     vals_ptr = vals;
1193     for (i=0; i<m; i++) {
1194       for (j=0; j<N; j++) {
1195         array[i + j*m] = *vals_ptr++;
1196       }
1197     }
1198 
1199     /* read in other processors and ship out */
1200     for (i=1; i<size; i++) {
1201       nz   = (rowners[i+1] - rowners[i])*N;
1202       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1203       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1204     }
1205   } else {
1206     /* receive numeric values */
1207     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1208 
1209     /* receive message of values*/
1210     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1211 
1212     /* insert into matrix-by row (this is why cannot directly read into array */
1213     vals_ptr = vals;
1214     for (i=0; i<m; i++) {
1215       for (j=0; j<N; j++) {
1216         array[i + j*m] = *vals_ptr++;
1217       }
1218     }
1219   }
1220   ierr = PetscFree(rowners);CHKERRQ(ierr);
1221   ierr = PetscFree(vals);CHKERRQ(ierr);
1222   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1223   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 EXTERN_C_BEGIN
1228 #undef __FUNCT__
1229 #define __FUNCT__ "MatLoad_MPIDense"
1230 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1231 {
1232   Mat          A;
1233   PetscScalar  *vals,*svals;
1234   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1235   MPI_Status   status;
1236   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1237   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1238   int          tag = ((PetscObject)viewer)->tag;
1239   int          i,nz,ierr,j,rstart,rend,fd;
1240 
1241   PetscFunctionBegin;
1242   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1243   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1244   if (!rank) {
1245     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1246     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1247     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1248   }
1249 
1250   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1251   M = header[1]; N = header[2]; nz = header[3];
1252 
1253   /*
1254        Handle case where matrix is stored on disk as a dense matrix
1255   */
1256   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1257     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1258     PetscFunctionReturn(0);
1259   }
1260 
1261   /* determine ownership of all rows */
1262   m          = M/size + ((M % size) > rank);
1263   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1264   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1265   rowners[0] = 0;
1266   for (i=2; i<=size; i++) {
1267     rowners[i] += rowners[i-1];
1268   }
1269   rstart = rowners[rank];
1270   rend   = rowners[rank+1];
1271 
1272   /* distribute row lengths to all processors */
1273   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1274   offlens = ourlens + (rend-rstart);
1275   if (!rank) {
1276     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1277     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1278     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1279     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1280     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1281     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1282   } else {
1283     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1284   }
1285 
1286   if (!rank) {
1287     /* calculate the number of nonzeros on each processor */
1288     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1289     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1290     for (i=0; i<size; i++) {
1291       for (j=rowners[i]; j< rowners[i+1]; j++) {
1292         procsnz[i] += rowlengths[j];
1293       }
1294     }
1295     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1296 
1297     /* determine max buffer needed and allocate it */
1298     maxnz = 0;
1299     for (i=0; i<size; i++) {
1300       maxnz = PetscMax(maxnz,procsnz[i]);
1301     }
1302     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1303 
1304     /* read in my part of the matrix column indices  */
1305     nz = procsnz[0];
1306     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1307     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1308 
1309     /* read in every one elses and ship off */
1310     for (i=1; i<size; i++) {
1311       nz   = procsnz[i];
1312       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1313       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1314     }
1315     ierr = PetscFree(cols);CHKERRQ(ierr);
1316   } else {
1317     /* determine buffer space needed for message */
1318     nz = 0;
1319     for (i=0; i<m; i++) {
1320       nz += ourlens[i];
1321     }
1322     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1323 
1324     /* receive message of column indices*/
1325     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1326     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1327     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1328   }
1329 
1330   /* loop over local rows, determining number of off diagonal entries */
1331   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1332   jj = 0;
1333   for (i=0; i<m; i++) {
1334     for (j=0; j<ourlens[i]; j++) {
1335       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1336       jj++;
1337     }
1338   }
1339 
1340   /* create our matrix */
1341   for (i=0; i<m; i++) {
1342     ourlens[i] -= offlens[i];
1343   }
1344   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1345   A = *newmat;
1346   for (i=0; i<m; i++) {
1347     ourlens[i] += offlens[i];
1348   }
1349 
1350   if (!rank) {
1351     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1352 
1353     /* read in my part of the matrix numerical values  */
1354     nz = procsnz[0];
1355     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1356 
1357     /* insert into matrix */
1358     jj      = rstart;
1359     smycols = mycols;
1360     svals   = vals;
1361     for (i=0; i<m; i++) {
1362       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1363       smycols += ourlens[i];
1364       svals   += ourlens[i];
1365       jj++;
1366     }
1367 
1368     /* read in other processors and ship out */
1369     for (i=1; i<size; i++) {
1370       nz   = procsnz[i];
1371       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1372       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1373     }
1374     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1375   } else {
1376     /* receive numeric values */
1377     ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1378 
1379     /* receive message of values*/
1380     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1381     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1382     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1383 
1384     /* insert into matrix */
1385     jj      = rstart;
1386     smycols = mycols;
1387     svals   = vals;
1388     for (i=0; i<m; i++) {
1389       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1390       smycols += ourlens[i];
1391       svals   += ourlens[i];
1392       jj++;
1393     }
1394   }
1395   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1396   ierr = PetscFree(vals);CHKERRQ(ierr);
1397   ierr = PetscFree(mycols);CHKERRQ(ierr);
1398   ierr = PetscFree(rowners);CHKERRQ(ierr);
1399 
1400   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1401   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 EXTERN_C_END
1405 
1406 
1407 
1408 
1409 
1410