xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision fafad7475eccdf22e42c12aa21085e705729f888)
1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNCT__
12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,ierr;
17   PetscScalar  *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
27   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
28   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatSetValues_MPIDense"
40 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
41 {
42   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
43   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
44   PetscTruth   roworiented = A->roworiented;
45 
46   PetscFunctionBegin;
47   for (i=0; i<m; i++) {
48     if (idxm[i] < 0) continue;
49     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
50     if (idxm[i] >= rstart && idxm[i] < rend) {
51       row = idxm[i] - rstart;
52       if (roworiented) {
53         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
54       } else {
55         for (j=0; j<n; j++) {
56           if (idxn[j] < 0) continue;
57           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
58           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
59         }
60       }
61     } else {
62       if (!A->donotstash) {
63         if (roworiented) {
64           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
65         } else {
66           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
67         }
68       }
69     }
70   }
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetValues_MPIDense"
76 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
77 {
78   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
79   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
80 
81   PetscFunctionBegin;
82   for (i=0; i<m; i++) {
83     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
84     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
85     if (idxm[i] >= rstart && idxm[i] < rend) {
86       row = idxm[i] - rstart;
87       for (j=0; j<n; j++) {
88         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
89         if (idxn[j] >= mat->N) {
90           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
91         }
92         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
93       }
94     } else {
95       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
96     }
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatGetArray_MPIDense"
103 int MatGetArray_MPIDense(Mat A,PetscScalar *array[])
104 {
105   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
106   int          ierr;
107 
108   PetscFunctionBegin;
109   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
110   PetscFunctionReturn(0);
111 }
112 
113 #undef __FUNCT__
114 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
115 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
116 {
117   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
118   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
119   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
120   PetscScalar  *av,*bv,*v = lmat->v;
121   Mat          newmat;
122 
123   PetscFunctionBegin;
124   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
125   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
126   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
127   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
128 
129   /* No parallel redistribution currently supported! Should really check each index set
130      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
131      original matrix! */
132 
133   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
134   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
135 
136   /* Check submatrix call */
137   if (scall == MAT_REUSE_MATRIX) {
138     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
139     /* Really need to test rows and column sizes! */
140     newmat = *B;
141   } else {
142     /* Create and fill new matrix */
143     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
144   }
145 
146   /* Now extract the data pointers and do the copy, column at a time */
147   newmatd = (Mat_MPIDense*)newmat->data;
148   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
149 
150   for (i=0; i<ncols; i++) {
151     av = v + nlrows*icol[i];
152     for (j=0; j<nrows; j++) {
153       *bv++ = av[irow[j] - rstart];
154     }
155   }
156 
157   /* Assemble the matrices so that the correct flags are set */
158   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
159   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
160 
161   /* Free work space */
162   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
163   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
164   *B = newmat;
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNCT__
169 #define __FUNCT__ "MatRestoreArray_MPIDense"
170 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
171 {
172   PetscFunctionBegin;
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNCT__
177 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
178 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
179 {
180   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
181   MPI_Comm     comm = mat->comm;
182   int          ierr,nstash,reallocs;
183   InsertMode   addv;
184 
185   PetscFunctionBegin;
186   /* make sure all processors are either in INSERTMODE or ADDMODE */
187   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
188   if (addv == (ADD_VALUES|INSERT_VALUES)) {
189     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
190   }
191   mat->insertmode = addv; /* in case this processor had no cache */
192 
193   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
194   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
195   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
196   PetscFunctionReturn(0);
197 }
198 
199 #undef __FUNCT__
200 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
201 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
202 {
203   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
204   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
205   PetscScalar  *val;
206   InsertMode   addv=mat->insertmode;
207 
208   PetscFunctionBegin;
209   /*  wait on receives */
210   while (1) {
211     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
212     if (!flg) break;
213 
214     for (i=0; i<n;) {
215       /* Now identify the consecutive vals belonging to the same row */
216       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
217       if (j < n) ncols = j-i;
218       else       ncols = n-i;
219       /* Now assemble all these values with a single function call */
220       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
221       i = j;
222     }
223   }
224   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
225 
226   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
227   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
228 
229   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
230     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
231   }
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatZeroEntries_MPIDense"
237 int MatZeroEntries_MPIDense(Mat A)
238 {
239   int          ierr;
240   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatGetBlockSize_MPIDense"
249 int MatGetBlockSize_MPIDense(Mat A,int *bs)
250 {
251   PetscFunctionBegin;
252   *bs = 1;
253   PetscFunctionReturn(0);
254 }
255 
256 /* the code does not do the diagonal entries correctly unless the
257    matrix is square and the column and row owerships are identical.
258    This is a BUG. The only way to fix it seems to be to access
259    mdn->A and mdn->B directly and not through the MatZeroRows()
260    routine.
261 */
262 #undef __FUNCT__
263 #define __FUNCT__ "MatZeroRows_MPIDense"
264 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
265 {
266   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
267   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
268   int            *nprocs,j,idx,nsends;
269   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
270   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
271   int            *lens,imdex,*lrows,*values;
272   MPI_Comm       comm = A->comm;
273   MPI_Request    *send_waits,*recv_waits;
274   MPI_Status     recv_status,*send_status;
275   IS             istmp;
276   PetscTruth     found;
277 
278   PetscFunctionBegin;
279   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
280   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
281 
282   /*  first count number of contributors to each processor */
283   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
284   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
285   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
286   for (i=0; i<N; i++) {
287     idx = rows[i];
288     found = PETSC_FALSE;
289     for (j=0; j<size; j++) {
290       if (idx >= owners[j] && idx < owners[j+1]) {
291         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
292       }
293     }
294     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
295   }
296   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
297 
298   /* inform other processors of number of messages and max length*/
299   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
300 
301   /* post receives:   */
302   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
303   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
304   for (i=0; i<nrecvs; i++) {
305     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
306   }
307 
308   /* do sends:
309       1) starts[i] gives the starting index in svalues for stuff going to
310          the ith processor
311   */
312   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
313   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
314   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
315   starts[0]  = 0;
316   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
317   for (i=0; i<N; i++) {
318     svalues[starts[owner[i]]++] = rows[i];
319   }
320   ISRestoreIndices(is,&rows);
321 
322   starts[0] = 0;
323   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
324   count = 0;
325   for (i=0; i<size; i++) {
326     if (nprocs[2*i+1]) {
327       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
328     }
329   }
330   ierr = PetscFree(starts);CHKERRQ(ierr);
331 
332   base = owners[rank];
333 
334   /*  wait on receives */
335   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
336   source = lens + nrecvs;
337   count  = nrecvs; slen = 0;
338   while (count) {
339     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
340     /* unpack receives into our local space */
341     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
342     source[imdex]  = recv_status.MPI_SOURCE;
343     lens[imdex]  = n;
344     slen += n;
345     count--;
346   }
347   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
348 
349   /* move the data into the send scatter */
350   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
351   count = 0;
352   for (i=0; i<nrecvs; i++) {
353     values = rvalues + i*nmax;
354     for (j=0; j<lens[i]; j++) {
355       lrows[count++] = values[j] - base;
356     }
357   }
358   ierr = PetscFree(rvalues);CHKERRQ(ierr);
359   ierr = PetscFree(lens);CHKERRQ(ierr);
360   ierr = PetscFree(owner);CHKERRQ(ierr);
361   ierr = PetscFree(nprocs);CHKERRQ(ierr);
362 
363   /* actually zap the local rows */
364   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
365   PetscLogObjectParent(A,istmp);
366   ierr = PetscFree(lrows);CHKERRQ(ierr);
367   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
368   ierr = ISDestroy(istmp);CHKERRQ(ierr);
369 
370   /* wait on sends */
371   if (nsends) {
372     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
373     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
374     ierr = PetscFree(send_status);CHKERRQ(ierr);
375   }
376   ierr = PetscFree(send_waits);CHKERRQ(ierr);
377   ierr = PetscFree(svalues);CHKERRQ(ierr);
378 
379   PetscFunctionReturn(0);
380 }
381 
382 #undef __FUNCT__
383 #define __FUNCT__ "MatMult_MPIDense"
384 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
385 {
386   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
387   int          ierr;
388 
389   PetscFunctionBegin;
390   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
391   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
392   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
393   PetscFunctionReturn(0);
394 }
395 
396 #undef __FUNCT__
397 #define __FUNCT__ "MatMultAdd_MPIDense"
398 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
399 {
400   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
401   int          ierr;
402 
403   PetscFunctionBegin;
404   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
405   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
406   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "MatMultTranspose_MPIDense"
412 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
413 {
414   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
415   int          ierr;
416   PetscScalar  zero = 0.0;
417 
418   PetscFunctionBegin;
419   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
420   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
421   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
422   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
428 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
429 {
430   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
431   int          ierr;
432 
433   PetscFunctionBegin;
434   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
435   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
436   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
437   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatGetDiagonal_MPIDense"
443 int MatGetDiagonal_MPIDense(Mat A,Vec v)
444 {
445   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
446   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
447   int          ierr,len,i,n,m = A->m,radd;
448   PetscScalar  *x,zero = 0.0;
449 
450   PetscFunctionBegin;
451   ierr = VecSet(&zero,v);CHKERRQ(ierr);
452   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
453   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
454   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
455   len  = PetscMin(a->A->m,a->A->n);
456   radd = a->rstart*m;
457   for (i=0; i<len; i++) {
458     x[i] = aloc->v[radd + i*m + i];
459   }
460   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatDestroy_MPIDense"
466 int MatDestroy_MPIDense(Mat mat)
467 {
468   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
469   int          ierr;
470 
471   PetscFunctionBegin;
472 
473 #if defined(PETSC_USE_LOG)
474   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
475 #endif
476   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
477   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
478   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
479   if (mdn->lvec)   VecDestroy(mdn->lvec);
480   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
481   if (mdn->factor) {
482     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
483     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
484     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
485     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
486   }
487   ierr = PetscFree(mdn);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatView_MPIDense_Binary"
493 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
494 {
495   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
496   int          ierr;
497 
498   PetscFunctionBegin;
499   if (mdn->size == 1) {
500     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
501   }
502   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
508 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
509 {
510   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
511   int               ierr,size = mdn->size,rank = mdn->rank;
512   PetscViewerType   vtype;
513   PetscTruth        isascii,isdraw;
514   PetscViewer       sviewer;
515   PetscViewerFormat format;
516 
517   PetscFunctionBegin;
518   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
519   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
520   if (isascii) {
521     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
522     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
523     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
524       MatInfo info;
525       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
526       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
527                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
528       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
529       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
530       PetscFunctionReturn(0);
531     } else if (format == PETSC_VIEWER_ASCII_INFO) {
532       PetscFunctionReturn(0);
533     }
534   } else if (isdraw) {
535     PetscDraw       draw;
536     PetscTruth isnull;
537 
538     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
539     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
540     if (isnull) PetscFunctionReturn(0);
541   }
542 
543   if (size == 1) {
544     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
545   } else {
546     /* assemble the entire matrix onto first processor. */
547     Mat          A;
548     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
549     PetscScalar  *vals;
550 
551     if (!rank) {
552       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
553     } else {
554       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
555     }
556     PetscLogObjectParent(mat,A);
557 
558     /* Copy the matrix ... This isn't the most efficient means,
559        but it's quick for now */
560     row = mdn->rstart; m = mdn->A->m;
561     for (i=0; i<m; i++) {
562       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
563       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
564       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
565       row++;
566     }
567 
568     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
569     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
571     if (!rank) {
572       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
573     }
574     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
575     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
576     ierr = MatDestroy(A);CHKERRQ(ierr);
577   }
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatView_MPIDense"
583 int MatView_MPIDense(Mat mat,PetscViewer viewer)
584 {
585   int        ierr;
586   PetscTruth isascii,isbinary,isdraw,issocket;
587 
588   PetscFunctionBegin;
589 
590   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
591   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
592   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
593   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
594 
595   if (isascii || issocket || isdraw) {
596     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
597   } else if (isbinary) {
598     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
599   } else {
600     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatGetInfo_MPIDense"
607 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
608 {
609   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
610   Mat          mdn = mat->A;
611   int          ierr;
612   PetscReal    isend[5],irecv[5];
613 
614   PetscFunctionBegin;
615   info->rows_global    = (double)A->M;
616   info->columns_global = (double)A->N;
617   info->rows_local     = (double)A->m;
618   info->columns_local  = (double)A->N;
619   info->block_size     = 1.0;
620   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
621   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
622   isend[3] = info->memory;  isend[4] = info->mallocs;
623   if (flag == MAT_LOCAL) {
624     info->nz_used      = isend[0];
625     info->nz_allocated = isend[1];
626     info->nz_unneeded  = isend[2];
627     info->memory       = isend[3];
628     info->mallocs      = isend[4];
629   } else if (flag == MAT_GLOBAL_MAX) {
630     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
631     info->nz_used      = irecv[0];
632     info->nz_allocated = irecv[1];
633     info->nz_unneeded  = irecv[2];
634     info->memory       = irecv[3];
635     info->mallocs      = irecv[4];
636   } else if (flag == MAT_GLOBAL_SUM) {
637     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
638     info->nz_used      = irecv[0];
639     info->nz_allocated = irecv[1];
640     info->nz_unneeded  = irecv[2];
641     info->memory       = irecv[3];
642     info->mallocs      = irecv[4];
643   }
644   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
645   info->fill_ratio_needed = 0;
646   info->factor_mallocs    = 0;
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNCT__
651 #define __FUNCT__ "MatSetOption_MPIDense"
652 int MatSetOption_MPIDense(Mat A,MatOption op)
653 {
654   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
655   int          ierr;
656 
657   PetscFunctionBegin;
658   switch (op) {
659   case MAT_NO_NEW_NONZERO_LOCATIONS:
660   case MAT_YES_NEW_NONZERO_LOCATIONS:
661   case MAT_NEW_NONZERO_LOCATION_ERR:
662   case MAT_NEW_NONZERO_ALLOCATION_ERR:
663   case MAT_COLUMNS_SORTED:
664   case MAT_COLUMNS_UNSORTED:
665     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
666     break;
667   case MAT_ROW_ORIENTED:
668     a->roworiented = PETSC_TRUE;
669     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
670     break;
671   case MAT_ROWS_SORTED:
672   case MAT_ROWS_UNSORTED:
673   case MAT_YES_NEW_DIAGONALS:
674   case MAT_USE_HASH_TABLE:
675     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
676     break;
677   case MAT_COLUMN_ORIENTED:
678     a->roworiented = PETSC_FALSE;
679     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
680     break;
681   case MAT_IGNORE_OFF_PROC_ENTRIES:
682     a->donotstash = PETSC_TRUE;
683     break;
684   case MAT_NO_NEW_DIAGONALS:
685     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
686   default:
687     SETERRQ(PETSC_ERR_SUP,"unknown option");
688   }
689   PetscFunctionReturn(0);
690 }
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatGetRow_MPIDense"
694 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
695 {
696   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
697   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
698 
699   PetscFunctionBegin;
700   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
701   lrow = row - rstart;
702   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNCT__
707 #define __FUNCT__ "MatRestoreRow_MPIDense"
708 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
709 {
710   int ierr;
711 
712   PetscFunctionBegin;
713   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
714   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
715   PetscFunctionReturn(0);
716 }
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatDiagonalScale_MPIDense"
720 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
721 {
722   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
723   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
724   PetscScalar  *l,*r,x,*v;
725   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
726 
727   PetscFunctionBegin;
728   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
729   if (ll) {
730     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
731     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
732     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
733     for (i=0; i<m; i++) {
734       x = l[i];
735       v = mat->v + i;
736       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
737     }
738     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
739     PetscLogFlops(n*m);
740   }
741   if (rr) {
742     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
743     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
744     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
745     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
746     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
747     for (i=0; i<n; i++) {
748       x = r[i];
749       v = mat->v + i*m;
750       for (j=0; j<m; j++) { (*v++) *= x;}
751     }
752     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
753     PetscLogFlops(n*m);
754   }
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatNorm_MPIDense"
760 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
761 {
762   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
763   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
764   int          ierr,i,j;
765   PetscReal    sum = 0.0;
766   PetscScalar  *v = mat->v;
767 
768   PetscFunctionBegin;
769   if (mdn->size == 1) {
770     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
771   } else {
772     if (type == NORM_FROBENIUS) {
773       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
774 #if defined(PETSC_USE_COMPLEX)
775         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
776 #else
777         sum += (*v)*(*v); v++;
778 #endif
779       }
780       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
781       *nrm = sqrt(*nrm);
782       PetscLogFlops(2*mdn->A->n*mdn->A->m);
783     } else if (type == NORM_1) {
784       PetscReal *tmp,*tmp2;
785       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
786       tmp2 = tmp + A->N;
787       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
788       *nrm = 0.0;
789       v = mat->v;
790       for (j=0; j<mdn->A->n; j++) {
791         for (i=0; i<mdn->A->m; i++) {
792           tmp[j] += PetscAbsScalar(*v);  v++;
793         }
794       }
795       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
796       for (j=0; j<A->N; j++) {
797         if (tmp2[j] > *nrm) *nrm = tmp2[j];
798       }
799       ierr = PetscFree(tmp);CHKERRQ(ierr);
800       PetscLogFlops(A->n*A->m);
801     } else if (type == NORM_INFINITY) { /* max row norm */
802       PetscReal ntemp;
803       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
804       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
805     } else {
806       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
807     }
808   }
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "MatTranspose_MPIDense"
814 int MatTranspose_MPIDense(Mat A,Mat *matout)
815 {
816   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
817   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
818   Mat          B;
819   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
820   int          j,i,ierr;
821   PetscScalar  *v;
822 
823   PetscFunctionBegin;
824   if (!matout && M != N) {
825     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
826   }
827   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
828 
829   m = a->A->m; n = a->A->n; v = Aloc->v;
830   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
831   for (j=0; j<n; j++) {
832     for (i=0; i<m; i++) rwork[i] = rstart + i;
833     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
834     v   += m;
835   }
836   ierr = PetscFree(rwork);CHKERRQ(ierr);
837   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
839   if (matout) {
840     *matout = B;
841   } else {
842     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
843   }
844   PetscFunctionReturn(0);
845 }
846 
847 #include "petscblaslapack.h"
848 #undef __FUNCT__
849 #define __FUNCT__ "MatScale_MPIDense"
850 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
851 {
852   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
853   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
854   int          one = 1,nz;
855 
856   PetscFunctionBegin;
857   nz = inA->m*inA->N;
858   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
859   PetscLogFlops(nz);
860   PetscFunctionReturn(0);
861 }
862 
863 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
867 int MatSetUpPreallocation_MPIDense(Mat A)
868 {
869   int        ierr;
870 
871   PetscFunctionBegin;
872   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 /* -------------------------------------------------------------------*/
877 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
878        MatGetRow_MPIDense,
879        MatRestoreRow_MPIDense,
880        MatMult_MPIDense,
881 /* 4*/ MatMultAdd_MPIDense,
882        MatMultTranspose_MPIDense,
883        MatMultTransposeAdd_MPIDense,
884        0,
885        0,
886        0,
887 /*10*/ 0,
888        0,
889        0,
890        0,
891        MatTranspose_MPIDense,
892 /*15*/ MatGetInfo_MPIDense,
893        0,
894        MatGetDiagonal_MPIDense,
895        MatDiagonalScale_MPIDense,
896        MatNorm_MPIDense,
897 /*20*/ MatAssemblyBegin_MPIDense,
898        MatAssemblyEnd_MPIDense,
899        0,
900        MatSetOption_MPIDense,
901        MatZeroEntries_MPIDense,
902 /*25*/ MatZeroRows_MPIDense,
903        0,
904        0,
905        0,
906        0,
907 /*30*/ MatSetUpPreallocation_MPIDense,
908        0,
909        0,
910        MatGetArray_MPIDense,
911        MatRestoreArray_MPIDense,
912 /*35*/ MatDuplicate_MPIDense,
913        0,
914        0,
915        0,
916        0,
917 /*40*/ 0,
918        MatGetSubMatrices_MPIDense,
919        0,
920        MatGetValues_MPIDense,
921        0,
922 /*45*/ 0,
923        MatScale_MPIDense,
924        0,
925        0,
926        0,
927 /*50*/ MatGetBlockSize_MPIDense,
928        0,
929        0,
930        0,
931        0,
932 /*55*/ 0,
933        0,
934        0,
935        0,
936        0,
937 /*60*/ MatGetSubMatrix_MPIDense,
938        MatDestroy_MPIDense,
939        MatView_MPIDense,
940        MatGetPetscMaps_Petsc,
941        0,
942 /*65*/ 0,
943        0,
944        0,
945        0,
946        0,
947 /*70*/ 0,
948        0,
949        0,
950        0,
951        0,
952 /*75*/ 0,
953        0,
954        0,
955        0,
956        0,
957 /*80*/ 0,
958        0,
959        0,
960        0,
961        0,
962 /*85*/ MatLoad_MPIDense};
963 
964 EXTERN_C_BEGIN
965 #undef __FUNCT__
966 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
967 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
968 {
969   Mat_MPIDense *a;
970   int          ierr;
971 
972   PetscFunctionBegin;
973   mat->preallocated = PETSC_TRUE;
974   /* Note:  For now, when data is specified above, this assumes the user correctly
975    allocates the local dense storage space.  We should add error checking. */
976 
977   a    = (Mat_MPIDense*)mat->data;
978   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
979   PetscLogObjectParent(mat,a->A);
980   PetscFunctionReturn(0);
981 }
982 EXTERN_C_END
983 
984 /*MC
985    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
986 
987    Options Database Keys:
988 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
989 
990   Level: beginner
991 
992 .seealso: MatCreateMPIDense
993 M*/
994 
995 EXTERN_C_BEGIN
996 #undef __FUNCT__
997 #define __FUNCT__ "MatCreate_MPIDense"
998 int MatCreate_MPIDense(Mat mat)
999 {
1000   Mat_MPIDense *a;
1001   int          ierr,i;
1002 
1003   PetscFunctionBegin;
1004   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1005   mat->data         = (void*)a;
1006   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1007   mat->factor       = 0;
1008   mat->mapping      = 0;
1009 
1010   a->factor       = 0;
1011   mat->insertmode = NOT_SET_VALUES;
1012   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1013   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1014 
1015   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1016   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1017   a->nvec = mat->n;
1018 
1019   /* the information in the maps duplicates the information computed below, eventually
1020      we should remove the duplicate information that is not contained in the maps */
1021   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1022   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1023 
1024   /* build local table of row and column ownerships */
1025   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1026   a->cowners = a->rowners + a->size + 1;
1027   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1028   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1029   a->rowners[0] = 0;
1030   for (i=2; i<=a->size; i++) {
1031     a->rowners[i] += a->rowners[i-1];
1032   }
1033   a->rstart = a->rowners[a->rank];
1034   a->rend   = a->rowners[a->rank+1];
1035   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1036   a->cowners[0] = 0;
1037   for (i=2; i<=a->size; i++) {
1038     a->cowners[i] += a->cowners[i-1];
1039   }
1040 
1041   /* build cache for off array entries formed */
1042   a->donotstash = PETSC_FALSE;
1043   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1044 
1045   /* stuff used for matrix vector multiply */
1046   a->lvec        = 0;
1047   a->Mvctx       = 0;
1048   a->roworiented = PETSC_TRUE;
1049 
1050   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1051                                      "MatGetDiagonalBlock_MPIDense",
1052                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1053   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1054                                      "MatMPIDenseSetPreallocation_MPIDense",
1055                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 EXTERN_C_END
1059 
1060 /*MC
1061    MATDENSE = "dense" - A matrix type to be used for dense matrices.
1062 
1063    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1064    and MATMPIDENSE otherwise.
1065 
1066    Options Database Keys:
1067 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1068 
1069   Level: beginner
1070 
1071 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1072 M*/
1073 
1074 EXTERN_C_BEGIN
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatCreate_Dense"
1077 int MatCreate_Dense(Mat A) {
1078   int ierr,size;
1079 
1080   PetscFunctionBegin;
1081   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1082   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1083   if (size == 1) {
1084     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1085   } else {
1086     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1087   }
1088   PetscFunctionReturn(0);
1089 }
1090 EXTERN_C_END
1091 
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1094 /*@C
1095    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1096 
1097    Not collective
1098 
1099    Input Parameters:
1100 .  A - the matrix
1101 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1102    to control all matrix memory allocation.
1103 
1104    Notes:
1105    The dense format is fully compatible with standard Fortran 77
1106    storage by columns.
1107 
1108    The data input variable is intended primarily for Fortran programmers
1109    who wish to allocate their own matrix memory space.  Most users should
1110    set data=PETSC_NULL.
1111 
1112    Level: intermediate
1113 
1114 .keywords: matrix,dense, parallel
1115 
1116 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1117 @*/
1118 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1119 {
1120   int ierr,(*f)(Mat,PetscScalar *);
1121 
1122   PetscFunctionBegin;
1123   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1124   if (f) {
1125     ierr = (*f)(mat,data);CHKERRQ(ierr);
1126   }
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatCreateMPIDense"
1132 /*@C
1133    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1134 
1135    Collective on MPI_Comm
1136 
1137    Input Parameters:
1138 +  comm - MPI communicator
1139 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1140 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1141 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1142 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1143 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1144    to control all matrix memory allocation.
1145 
1146    Output Parameter:
1147 .  A - the matrix
1148 
1149    Notes:
1150    The dense format is fully compatible with standard Fortran 77
1151    storage by columns.
1152 
1153    The data input variable is intended primarily for Fortran programmers
1154    who wish to allocate their own matrix memory space.  Most users should
1155    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1156 
1157    The user MUST specify either the local or global matrix dimensions
1158    (possibly both).
1159 
1160    Level: intermediate
1161 
1162 .keywords: matrix,dense, parallel
1163 
1164 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1165 @*/
1166 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1167 {
1168   int ierr,size;
1169 
1170   PetscFunctionBegin;
1171   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1172   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1173   if (size > 1) {
1174     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1175     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1176   } else {
1177     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1178     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1179   }
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatDuplicate_MPIDense"
1185 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1186 {
1187   Mat          mat;
1188   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1189   int          ierr;
1190 
1191   PetscFunctionBegin;
1192   *newmat       = 0;
1193   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1194   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1195   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1196   mat->data         = (void*)a;
1197   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1198   mat->factor       = A->factor;
1199   mat->assembled    = PETSC_TRUE;
1200   mat->preallocated = PETSC_TRUE;
1201 
1202   a->rstart       = oldmat->rstart;
1203   a->rend         = oldmat->rend;
1204   a->size         = oldmat->size;
1205   a->rank         = oldmat->rank;
1206   mat->insertmode = NOT_SET_VALUES;
1207   a->nvec         = oldmat->nvec;
1208   a->donotstash   = oldmat->donotstash;
1209   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1210   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1211   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1212   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1213 
1214   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1215   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1216   PetscLogObjectParent(mat,a->A);
1217   *newmat = mat;
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 #include "petscsys.h"
1222 
1223 #undef __FUNCT__
1224 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1225 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1226 {
1227   int          *rowners,i,size,rank,m,ierr,nz,j;
1228   PetscScalar  *array,*vals,*vals_ptr;
1229   MPI_Status   status;
1230 
1231   PetscFunctionBegin;
1232   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1233   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1234 
1235   /* determine ownership of all rows */
1236   m          = M/size + ((M % size) > rank);
1237   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1238   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1239   rowners[0] = 0;
1240   for (i=2; i<=size; i++) {
1241     rowners[i] += rowners[i-1];
1242   }
1243 
1244   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1245   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1246 
1247   if (!rank) {
1248     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1249 
1250     /* read in my part of the matrix numerical values  */
1251     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1252 
1253     /* insert into matrix-by row (this is why cannot directly read into array */
1254     vals_ptr = vals;
1255     for (i=0; i<m; i++) {
1256       for (j=0; j<N; j++) {
1257         array[i + j*m] = *vals_ptr++;
1258       }
1259     }
1260 
1261     /* read in other processors and ship out */
1262     for (i=1; i<size; i++) {
1263       nz   = (rowners[i+1] - rowners[i])*N;
1264       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1265       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1266     }
1267   } else {
1268     /* receive numeric values */
1269     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1270 
1271     /* receive message of values*/
1272     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1273 
1274     /* insert into matrix-by row (this is why cannot directly read into array */
1275     vals_ptr = vals;
1276     for (i=0; i<m; i++) {
1277       for (j=0; j<N; j++) {
1278         array[i + j*m] = *vals_ptr++;
1279       }
1280     }
1281   }
1282   ierr = PetscFree(rowners);CHKERRQ(ierr);
1283   ierr = PetscFree(vals);CHKERRQ(ierr);
1284   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1285   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 #undef __FUNCT__
1290 #define __FUNCT__ "MatLoad_MPIDense"
1291 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1292 {
1293   Mat          A;
1294   PetscScalar  *vals,*svals;
1295   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1296   MPI_Status   status;
1297   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1298   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1299   int          tag = ((PetscObject)viewer)->tag;
1300   int          i,nz,ierr,j,rstart,rend,fd;
1301 
1302   PetscFunctionBegin;
1303   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1304   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1305   if (!rank) {
1306     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1307     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1308     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1309   }
1310 
1311   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1312   M = header[1]; N = header[2]; nz = header[3];
1313 
1314   /*
1315        Handle case where matrix is stored on disk as a dense matrix
1316   */
1317   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1318     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1319     PetscFunctionReturn(0);
1320   }
1321 
1322   /* determine ownership of all rows */
1323   m          = M/size + ((M % size) > rank);
1324   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1325   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1326   rowners[0] = 0;
1327   for (i=2; i<=size; i++) {
1328     rowners[i] += rowners[i-1];
1329   }
1330   rstart = rowners[rank];
1331   rend   = rowners[rank+1];
1332 
1333   /* distribute row lengths to all processors */
1334   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1335   offlens = ourlens + (rend-rstart);
1336   if (!rank) {
1337     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1338     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1339     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1340     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1341     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1342     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1343   } else {
1344     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1345   }
1346 
1347   if (!rank) {
1348     /* calculate the number of nonzeros on each processor */
1349     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1350     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1351     for (i=0; i<size; i++) {
1352       for (j=rowners[i]; j< rowners[i+1]; j++) {
1353         procsnz[i] += rowlengths[j];
1354       }
1355     }
1356     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1357 
1358     /* determine max buffer needed and allocate it */
1359     maxnz = 0;
1360     for (i=0; i<size; i++) {
1361       maxnz = PetscMax(maxnz,procsnz[i]);
1362     }
1363     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1364 
1365     /* read in my part of the matrix column indices  */
1366     nz = procsnz[0];
1367     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1368     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1369 
1370     /* read in every one elses and ship off */
1371     for (i=1; i<size; i++) {
1372       nz   = procsnz[i];
1373       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1374       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1375     }
1376     ierr = PetscFree(cols);CHKERRQ(ierr);
1377   } else {
1378     /* determine buffer space needed for message */
1379     nz = 0;
1380     for (i=0; i<m; i++) {
1381       nz += ourlens[i];
1382     }
1383     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1384 
1385     /* receive message of column indices*/
1386     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1387     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1388     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1389   }
1390 
1391   /* loop over local rows, determining number of off diagonal entries */
1392   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1393   jj = 0;
1394   for (i=0; i<m; i++) {
1395     for (j=0; j<ourlens[i]; j++) {
1396       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1397       jj++;
1398     }
1399   }
1400 
1401   /* create our matrix */
1402   for (i=0; i<m; i++) {
1403     ourlens[i] -= offlens[i];
1404   }
1405   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1406   A = *newmat;
1407   for (i=0; i<m; i++) {
1408     ourlens[i] += offlens[i];
1409   }
1410 
1411   if (!rank) {
1412     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1413 
1414     /* read in my part of the matrix numerical values  */
1415     nz = procsnz[0];
1416     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1417 
1418     /* insert into matrix */
1419     jj      = rstart;
1420     smycols = mycols;
1421     svals   = vals;
1422     for (i=0; i<m; i++) {
1423       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1424       smycols += ourlens[i];
1425       svals   += ourlens[i];
1426       jj++;
1427     }
1428 
1429     /* read in other processors and ship out */
1430     for (i=1; i<size; i++) {
1431       nz   = procsnz[i];
1432       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1433       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1434     }
1435     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1436   } else {
1437     /* receive numeric values */
1438     ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1439 
1440     /* receive message of values*/
1441     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1442     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1443     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1444 
1445     /* insert into matrix */
1446     jj      = rstart;
1447     smycols = mycols;
1448     svals   = vals;
1449     for (i=0; i<m; i++) {
1450       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1451       smycols += ourlens[i];
1452       svals   += ourlens[i];
1453       jj++;
1454     }
1455   }
1456   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1457   ierr = PetscFree(vals);CHKERRQ(ierr);
1458   ierr = PetscFree(mycols);CHKERRQ(ierr);
1459   ierr = PetscFree(rowners);CHKERRQ(ierr);
1460 
1461   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 
1467 
1468 
1469 
1470