xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 23f7533aab5e227e73b6961701db9f0cb7c754f7)
1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNCT__
12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,ierr;
17   PetscScalar  *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
27   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
28   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatSetValues_MPIDense"
40 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
41 {
42   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
43   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
44   PetscTruth   roworiented = A->roworiented;
45 
46   PetscFunctionBegin;
47   for (i=0; i<m; i++) {
48     if (idxm[i] < 0) continue;
49     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
50     if (idxm[i] >= rstart && idxm[i] < rend) {
51       row = idxm[i] - rstart;
52       if (roworiented) {
53         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
54       } else {
55         for (j=0; j<n; j++) {
56           if (idxn[j] < 0) continue;
57           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
58           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
59         }
60       }
61     } else {
62       if (!A->donotstash) {
63         if (roworiented) {
64           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
65         } else {
66           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
67         }
68       }
69     }
70   }
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetValues_MPIDense"
76 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
77 {
78   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
79   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
80 
81   PetscFunctionBegin;
82   for (i=0; i<m; i++) {
83     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
84     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
85     if (idxm[i] >= rstart && idxm[i] < rend) {
86       row = idxm[i] - rstart;
87       for (j=0; j<n; j++) {
88         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
89         if (idxn[j] >= mat->N) {
90           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
91         }
92         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
93       }
94     } else {
95       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
96     }
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatGetArray_MPIDense"
103 int MatGetArray_MPIDense(Mat A,PetscScalar *array[])
104 {
105   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
106   int          ierr;
107 
108   PetscFunctionBegin;
109   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
110   PetscFunctionReturn(0);
111 }
112 
113 #undef __FUNCT__
114 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
115 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
116 {
117   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
118   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
119   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
120   PetscScalar  *av,*bv,*v = lmat->v;
121   Mat          newmat;
122 
123   PetscFunctionBegin;
124   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
125   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
126   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
127   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
128 
129   /* No parallel redistribution currently supported! Should really check each index set
130      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
131      original matrix! */
132 
133   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
134   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
135 
136   /* Check submatrix call */
137   if (scall == MAT_REUSE_MATRIX) {
138     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
139     /* Really need to test rows and column sizes! */
140     newmat = *B;
141   } else {
142     /* Create and fill new matrix */
143     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
144   }
145 
146   /* Now extract the data pointers and do the copy, column at a time */
147   newmatd = (Mat_MPIDense*)newmat->data;
148   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
149 
150   for (i=0; i<ncols; i++) {
151     av = v + nlrows*icol[i];
152     for (j=0; j<nrows; j++) {
153       *bv++ = av[irow[j] - rstart];
154     }
155   }
156 
157   /* Assemble the matrices so that the correct flags are set */
158   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
159   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
160 
161   /* Free work space */
162   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
163   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
164   *B = newmat;
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNCT__
169 #define __FUNCT__ "MatRestoreArray_MPIDense"
170 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
171 {
172   PetscFunctionBegin;
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNCT__
177 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
178 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
179 {
180   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
181   MPI_Comm     comm = mat->comm;
182   int          ierr,nstash,reallocs;
183   InsertMode   addv;
184 
185   PetscFunctionBegin;
186   /* make sure all processors are either in INSERTMODE or ADDMODE */
187   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
188   if (addv == (ADD_VALUES|INSERT_VALUES)) {
189     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
190   }
191   mat->insertmode = addv; /* in case this processor had no cache */
192 
193   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
194   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
195   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
196   PetscFunctionReturn(0);
197 }
198 
199 #undef __FUNCT__
200 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
201 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
202 {
203   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
204   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
205   PetscScalar  *val;
206   InsertMode   addv=mat->insertmode;
207 
208   PetscFunctionBegin;
209   /*  wait on receives */
210   while (1) {
211     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
212     if (!flg) break;
213 
214     for (i=0; i<n;) {
215       /* Now identify the consecutive vals belonging to the same row */
216       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
217       if (j < n) ncols = j-i;
218       else       ncols = n-i;
219       /* Now assemble all these values with a single function call */
220       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
221       i = j;
222     }
223   }
224   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
225 
226   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
227   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
228 
229   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
230     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
231   }
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatZeroEntries_MPIDense"
237 int MatZeroEntries_MPIDense(Mat A)
238 {
239   int          ierr;
240   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatGetBlockSize_MPIDense"
249 int MatGetBlockSize_MPIDense(Mat A,int *bs)
250 {
251   PetscFunctionBegin;
252   *bs = 1;
253   PetscFunctionReturn(0);
254 }
255 
256 /* the code does not do the diagonal entries correctly unless the
257    matrix is square and the column and row owerships are identical.
258    This is a BUG. The only way to fix it seems to be to access
259    mdn->A and mdn->B directly and not through the MatZeroRows()
260    routine.
261 */
262 #undef __FUNCT__
263 #define __FUNCT__ "MatZeroRows_MPIDense"
264 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
265 {
266   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
267   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
268   int            *nprocs,j,idx,nsends;
269   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
270   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
271   int            *lens,imdex,*lrows,*values;
272   MPI_Comm       comm = A->comm;
273   MPI_Request    *send_waits,*recv_waits;
274   MPI_Status     recv_status,*send_status;
275   IS             istmp;
276   PetscTruth     found;
277 
278   PetscFunctionBegin;
279   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
280   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
281 
282   /*  first count number of contributors to each processor */
283   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
284   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
285   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
286   for (i=0; i<N; i++) {
287     idx = rows[i];
288     found = PETSC_FALSE;
289     for (j=0; j<size; j++) {
290       if (idx >= owners[j] && idx < owners[j+1]) {
291         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
292       }
293     }
294     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
295   }
296   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
297 
298   /* inform other processors of number of messages and max length*/
299   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
300 
301   /* post receives:   */
302   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
303   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
304   for (i=0; i<nrecvs; i++) {
305     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
306   }
307 
308   /* do sends:
309       1) starts[i] gives the starting index in svalues for stuff going to
310          the ith processor
311   */
312   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
313   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
314   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
315   starts[0]  = 0;
316   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
317   for (i=0; i<N; i++) {
318     svalues[starts[owner[i]]++] = rows[i];
319   }
320   ISRestoreIndices(is,&rows);
321 
322   starts[0] = 0;
323   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
324   count = 0;
325   for (i=0; i<size; i++) {
326     if (nprocs[2*i+1]) {
327       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
328     }
329   }
330   ierr = PetscFree(starts);CHKERRQ(ierr);
331 
332   base = owners[rank];
333 
334   /*  wait on receives */
335   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
336   source = lens + nrecvs;
337   count  = nrecvs; slen = 0;
338   while (count) {
339     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
340     /* unpack receives into our local space */
341     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
342     source[imdex]  = recv_status.MPI_SOURCE;
343     lens[imdex]  = n;
344     slen += n;
345     count--;
346   }
347   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
348 
349   /* move the data into the send scatter */
350   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
351   count = 0;
352   for (i=0; i<nrecvs; i++) {
353     values = rvalues + i*nmax;
354     for (j=0; j<lens[i]; j++) {
355       lrows[count++] = values[j] - base;
356     }
357   }
358   ierr = PetscFree(rvalues);CHKERRQ(ierr);
359   ierr = PetscFree(lens);CHKERRQ(ierr);
360   ierr = PetscFree(owner);CHKERRQ(ierr);
361   ierr = PetscFree(nprocs);CHKERRQ(ierr);
362 
363   /* actually zap the local rows */
364   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
365   PetscLogObjectParent(A,istmp);
366   ierr = PetscFree(lrows);CHKERRQ(ierr);
367   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
368   ierr = ISDestroy(istmp);CHKERRQ(ierr);
369 
370   /* wait on sends */
371   if (nsends) {
372     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
373     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
374     ierr = PetscFree(send_status);CHKERRQ(ierr);
375   }
376   ierr = PetscFree(send_waits);CHKERRQ(ierr);
377   ierr = PetscFree(svalues);CHKERRQ(ierr);
378 
379   PetscFunctionReturn(0);
380 }
381 
382 #undef __FUNCT__
383 #define __FUNCT__ "MatMult_MPIDense"
384 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
385 {
386   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
387   int          ierr;
388 
389   PetscFunctionBegin;
390   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
391   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
392   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
393   PetscFunctionReturn(0);
394 }
395 
396 #undef __FUNCT__
397 #define __FUNCT__ "MatMultAdd_MPIDense"
398 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
399 {
400   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
401   int          ierr;
402 
403   PetscFunctionBegin;
404   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
405   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
406   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "MatMultTranspose_MPIDense"
412 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
413 {
414   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
415   int          ierr;
416   PetscScalar  zero = 0.0;
417 
418   PetscFunctionBegin;
419   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
420   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
421   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
422   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
428 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
429 {
430   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
431   int          ierr;
432 
433   PetscFunctionBegin;
434   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
435   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
436   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
437   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatGetDiagonal_MPIDense"
443 int MatGetDiagonal_MPIDense(Mat A,Vec v)
444 {
445   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
446   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
447   int          ierr,len,i,n,m = A->m,radd;
448   PetscScalar  *x,zero = 0.0;
449 
450   PetscFunctionBegin;
451   ierr = VecSet(&zero,v);CHKERRQ(ierr);
452   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
453   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
454   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
455   len  = PetscMin(a->A->m,a->A->n);
456   radd = a->rstart*m;
457   for (i=0; i<len; i++) {
458     x[i] = aloc->v[radd + i*m + i];
459   }
460   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatDestroy_MPIDense"
466 int MatDestroy_MPIDense(Mat mat)
467 {
468   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
469   int          ierr;
470 
471   PetscFunctionBegin;
472 
473 #if defined(PETSC_USE_LOG)
474   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
475 #endif
476   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
477   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
478   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
479   if (mdn->lvec)   VecDestroy(mdn->lvec);
480   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
481   if (mdn->factor) {
482     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
483     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
484     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
485     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
486   }
487   ierr = PetscFree(mdn);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatView_MPIDense_Binary"
493 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
494 {
495   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
496   int          ierr;
497 
498   PetscFunctionBegin;
499   if (mdn->size == 1) {
500     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
501   }
502   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
508 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
509 {
510   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
511   int               ierr,size = mdn->size,rank = mdn->rank;
512   PetscViewerType   vtype;
513   PetscTruth        isascii,isdraw;
514   PetscViewer       sviewer;
515   PetscViewerFormat format;
516 
517   PetscFunctionBegin;
518   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
519   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
520   if (isascii) {
521     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
522     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
523     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
524       MatInfo info;
525       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
526       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
527                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
528       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
529       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
530       PetscFunctionReturn(0);
531     } else if (format == PETSC_VIEWER_ASCII_INFO) {
532       PetscFunctionReturn(0);
533     }
534   } else if (isdraw) {
535     PetscDraw       draw;
536     PetscTruth isnull;
537 
538     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
539     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
540     if (isnull) PetscFunctionReturn(0);
541   }
542 
543   if (size == 1) {
544     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
545   } else {
546     /* assemble the entire matrix onto first processor. */
547     Mat          A;
548     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
549     PetscScalar  *vals;
550 
551     if (!rank) {
552       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
553     } else {
554       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
555     }
556     PetscLogObjectParent(mat,A);
557 
558     /* Copy the matrix ... This isn't the most efficient means,
559        but it's quick for now */
560     row = mdn->rstart; m = mdn->A->m;
561     for (i=0; i<m; i++) {
562       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
563       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
564       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
565       row++;
566     }
567 
568     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
569     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
571     if (!rank) {
572       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
573     }
574     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
575     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
576     ierr = MatDestroy(A);CHKERRQ(ierr);
577   }
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatView_MPIDense"
583 int MatView_MPIDense(Mat mat,PetscViewer viewer)
584 {
585   int        ierr;
586   PetscTruth isascii,isbinary,isdraw,issocket;
587 
588   PetscFunctionBegin;
589 
590   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
591   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
592   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
593   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
594 
595   if (isascii || issocket || isdraw) {
596     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
597   } else if (isbinary) {
598     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
599   } else {
600     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatGetInfo_MPIDense"
607 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
608 {
609   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
610   Mat          mdn = mat->A;
611   int          ierr;
612   PetscReal    isend[5],irecv[5];
613 
614   PetscFunctionBegin;
615   info->rows_global    = (double)A->M;
616   info->columns_global = (double)A->N;
617   info->rows_local     = (double)A->m;
618   info->columns_local  = (double)A->N;
619   info->block_size     = 1.0;
620   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
621   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
622   isend[3] = info->memory;  isend[4] = info->mallocs;
623   if (flag == MAT_LOCAL) {
624     info->nz_used      = isend[0];
625     info->nz_allocated = isend[1];
626     info->nz_unneeded  = isend[2];
627     info->memory       = isend[3];
628     info->mallocs      = isend[4];
629   } else if (flag == MAT_GLOBAL_MAX) {
630     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
631     info->nz_used      = irecv[0];
632     info->nz_allocated = irecv[1];
633     info->nz_unneeded  = irecv[2];
634     info->memory       = irecv[3];
635     info->mallocs      = irecv[4];
636   } else if (flag == MAT_GLOBAL_SUM) {
637     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
638     info->nz_used      = irecv[0];
639     info->nz_allocated = irecv[1];
640     info->nz_unneeded  = irecv[2];
641     info->memory       = irecv[3];
642     info->mallocs      = irecv[4];
643   }
644   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
645   info->fill_ratio_needed = 0;
646   info->factor_mallocs    = 0;
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNCT__
651 #define __FUNCT__ "MatSetOption_MPIDense"
652 int MatSetOption_MPIDense(Mat A,MatOption op)
653 {
654   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
655   int          ierr;
656 
657   PetscFunctionBegin;
658   switch (op) {
659   case MAT_NO_NEW_NONZERO_LOCATIONS:
660   case MAT_YES_NEW_NONZERO_LOCATIONS:
661   case MAT_NEW_NONZERO_LOCATION_ERR:
662   case MAT_NEW_NONZERO_ALLOCATION_ERR:
663   case MAT_COLUMNS_SORTED:
664   case MAT_COLUMNS_UNSORTED:
665     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
666     break;
667   case MAT_ROW_ORIENTED:
668     a->roworiented = PETSC_TRUE;
669     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
670     break;
671   case MAT_ROWS_SORTED:
672   case MAT_ROWS_UNSORTED:
673   case MAT_YES_NEW_DIAGONALS:
674   case MAT_USE_HASH_TABLE:
675     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
676     break;
677   case MAT_COLUMN_ORIENTED:
678     a->roworiented = PETSC_FALSE;
679     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
680     break;
681   case MAT_IGNORE_OFF_PROC_ENTRIES:
682     a->donotstash = PETSC_TRUE;
683     break;
684   case MAT_NO_NEW_DIAGONALS:
685     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
686   case MAT_SYMMETRIC:
687   case MAT_STRUCTURALLY_SYMMETRIC:
688     break;
689   default:
690     SETERRQ(PETSC_ERR_SUP,"unknown option");
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatGetRow_MPIDense"
697 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
698 {
699   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
700   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
701 
702   PetscFunctionBegin;
703   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
704   lrow = row - rstart;
705   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
706   PetscFunctionReturn(0);
707 }
708 
709 #undef __FUNCT__
710 #define __FUNCT__ "MatRestoreRow_MPIDense"
711 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
712 {
713   int ierr;
714 
715   PetscFunctionBegin;
716   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
717   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatDiagonalScale_MPIDense"
723 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
724 {
725   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
726   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
727   PetscScalar  *l,*r,x,*v;
728   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
729 
730   PetscFunctionBegin;
731   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
732   if (ll) {
733     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
734     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
735     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
736     for (i=0; i<m; i++) {
737       x = l[i];
738       v = mat->v + i;
739       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
740     }
741     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
742     PetscLogFlops(n*m);
743   }
744   if (rr) {
745     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
746     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
747     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
748     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
749     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
750     for (i=0; i<n; i++) {
751       x = r[i];
752       v = mat->v + i*m;
753       for (j=0; j<m; j++) { (*v++) *= x;}
754     }
755     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
756     PetscLogFlops(n*m);
757   }
758   PetscFunctionReturn(0);
759 }
760 
761 #undef __FUNCT__
762 #define __FUNCT__ "MatNorm_MPIDense"
763 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
764 {
765   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
766   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
767   int          ierr,i,j;
768   PetscReal    sum = 0.0;
769   PetscScalar  *v = mat->v;
770 
771   PetscFunctionBegin;
772   if (mdn->size == 1) {
773     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
774   } else {
775     if (type == NORM_FROBENIUS) {
776       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
777 #if defined(PETSC_USE_COMPLEX)
778         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
779 #else
780         sum += (*v)*(*v); v++;
781 #endif
782       }
783       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
784       *nrm = sqrt(*nrm);
785       PetscLogFlops(2*mdn->A->n*mdn->A->m);
786     } else if (type == NORM_1) {
787       PetscReal *tmp,*tmp2;
788       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
789       tmp2 = tmp + A->N;
790       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
791       *nrm = 0.0;
792       v = mat->v;
793       for (j=0; j<mdn->A->n; j++) {
794         for (i=0; i<mdn->A->m; i++) {
795           tmp[j] += PetscAbsScalar(*v);  v++;
796         }
797       }
798       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
799       for (j=0; j<A->N; j++) {
800         if (tmp2[j] > *nrm) *nrm = tmp2[j];
801       }
802       ierr = PetscFree(tmp);CHKERRQ(ierr);
803       PetscLogFlops(A->n*A->m);
804     } else if (type == NORM_INFINITY) { /* max row norm */
805       PetscReal ntemp;
806       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
807       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
808     } else {
809       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
810     }
811   }
812   PetscFunctionReturn(0);
813 }
814 
815 #undef __FUNCT__
816 #define __FUNCT__ "MatTranspose_MPIDense"
817 int MatTranspose_MPIDense(Mat A,Mat *matout)
818 {
819   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
820   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
821   Mat          B;
822   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
823   int          j,i,ierr;
824   PetscScalar  *v;
825 
826   PetscFunctionBegin;
827   if (!matout && M != N) {
828     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
829   }
830   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
831 
832   m = a->A->m; n = a->A->n; v = Aloc->v;
833   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
834   for (j=0; j<n; j++) {
835     for (i=0; i<m; i++) rwork[i] = rstart + i;
836     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
837     v   += m;
838   }
839   ierr = PetscFree(rwork);CHKERRQ(ierr);
840   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
841   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
842   if (matout) {
843     *matout = B;
844   } else {
845     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
846   }
847   PetscFunctionReturn(0);
848 }
849 
850 #include "petscblaslapack.h"
851 #undef __FUNCT__
852 #define __FUNCT__ "MatScale_MPIDense"
853 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
854 {
855   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
856   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
857   int          one = 1,nz;
858 
859   PetscFunctionBegin;
860   nz = inA->m*inA->N;
861   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
862   PetscLogFlops(nz);
863   PetscFunctionReturn(0);
864 }
865 
866 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
870 int MatSetUpPreallocation_MPIDense(Mat A)
871 {
872   int        ierr;
873 
874   PetscFunctionBegin;
875   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 /* -------------------------------------------------------------------*/
880 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
881        MatGetRow_MPIDense,
882        MatRestoreRow_MPIDense,
883        MatMult_MPIDense,
884 /* 4*/ MatMultAdd_MPIDense,
885        MatMultTranspose_MPIDense,
886        MatMultTransposeAdd_MPIDense,
887        0,
888        0,
889        0,
890 /*10*/ 0,
891        0,
892        0,
893        0,
894        MatTranspose_MPIDense,
895 /*15*/ MatGetInfo_MPIDense,
896        0,
897        MatGetDiagonal_MPIDense,
898        MatDiagonalScale_MPIDense,
899        MatNorm_MPIDense,
900 /*20*/ MatAssemblyBegin_MPIDense,
901        MatAssemblyEnd_MPIDense,
902        0,
903        MatSetOption_MPIDense,
904        MatZeroEntries_MPIDense,
905 /*25*/ MatZeroRows_MPIDense,
906        0,
907        0,
908        0,
909        0,
910 /*30*/ MatSetUpPreallocation_MPIDense,
911        0,
912        0,
913        MatGetArray_MPIDense,
914        MatRestoreArray_MPIDense,
915 /*35*/ MatDuplicate_MPIDense,
916        0,
917        0,
918        0,
919        0,
920 /*40*/ 0,
921        MatGetSubMatrices_MPIDense,
922        0,
923        MatGetValues_MPIDense,
924        0,
925 /*45*/ 0,
926        MatScale_MPIDense,
927        0,
928        0,
929        0,
930 /*50*/ MatGetBlockSize_MPIDense,
931        0,
932        0,
933        0,
934        0,
935 /*55*/ 0,
936        0,
937        0,
938        0,
939        0,
940 /*60*/ MatGetSubMatrix_MPIDense,
941        MatDestroy_MPIDense,
942        MatView_MPIDense,
943        MatGetPetscMaps_Petsc,
944        0,
945 /*65*/ 0,
946        0,
947        0,
948        0,
949        0,
950 /*70*/ 0,
951        0,
952        0,
953        0,
954        0,
955 /*75*/ 0,
956        0,
957        0,
958        0,
959        0,
960 /*80*/ 0,
961        0,
962        0,
963        0,
964        0,
965 /*85*/ MatLoad_MPIDense};
966 
967 EXTERN_C_BEGIN
968 #undef __FUNCT__
969 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
970 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
971 {
972   Mat_MPIDense *a;
973   int          ierr;
974 
975   PetscFunctionBegin;
976   mat->preallocated = PETSC_TRUE;
977   /* Note:  For now, when data is specified above, this assumes the user correctly
978    allocates the local dense storage space.  We should add error checking. */
979 
980   a    = (Mat_MPIDense*)mat->data;
981   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
982   PetscLogObjectParent(mat,a->A);
983   PetscFunctionReturn(0);
984 }
985 EXTERN_C_END
986 
987 /*MC
988    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
989 
990    Options Database Keys:
991 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
992 
993   Level: beginner
994 
995 .seealso: MatCreateMPIDense
996 M*/
997 
998 EXTERN_C_BEGIN
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatCreate_MPIDense"
1001 int MatCreate_MPIDense(Mat mat)
1002 {
1003   Mat_MPIDense *a;
1004   int          ierr,i;
1005 
1006   PetscFunctionBegin;
1007   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1008   mat->data         = (void*)a;
1009   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1010   mat->factor       = 0;
1011   mat->mapping      = 0;
1012 
1013   a->factor       = 0;
1014   mat->insertmode = NOT_SET_VALUES;
1015   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1016   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1017 
1018   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1019   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1020   a->nvec = mat->n;
1021 
1022   /* the information in the maps duplicates the information computed below, eventually
1023      we should remove the duplicate information that is not contained in the maps */
1024   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1025   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1026 
1027   /* build local table of row and column ownerships */
1028   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1029   a->cowners = a->rowners + a->size + 1;
1030   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1031   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1032   a->rowners[0] = 0;
1033   for (i=2; i<=a->size; i++) {
1034     a->rowners[i] += a->rowners[i-1];
1035   }
1036   a->rstart = a->rowners[a->rank];
1037   a->rend   = a->rowners[a->rank+1];
1038   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1039   a->cowners[0] = 0;
1040   for (i=2; i<=a->size; i++) {
1041     a->cowners[i] += a->cowners[i-1];
1042   }
1043 
1044   /* build cache for off array entries formed */
1045   a->donotstash = PETSC_FALSE;
1046   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1047 
1048   /* stuff used for matrix vector multiply */
1049   a->lvec        = 0;
1050   a->Mvctx       = 0;
1051   a->roworiented = PETSC_TRUE;
1052 
1053   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1054                                      "MatGetDiagonalBlock_MPIDense",
1055                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1056   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1057                                      "MatMPIDenseSetPreallocation_MPIDense",
1058                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 EXTERN_C_END
1062 
1063 /*MC
1064    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1065 
1066    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1067    and MATMPIDENSE otherwise.
1068 
1069    Options Database Keys:
1070 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1071 
1072   Level: beginner
1073 
1074 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1075 M*/
1076 
1077 EXTERN_C_BEGIN
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatCreate_Dense"
1080 int MatCreate_Dense(Mat A) {
1081   int ierr,size;
1082 
1083   PetscFunctionBegin;
1084   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1085   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1086   if (size == 1) {
1087     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1088   } else {
1089     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1090   }
1091   PetscFunctionReturn(0);
1092 }
1093 EXTERN_C_END
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1097 /*@C
1098    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1099 
1100    Not collective
1101 
1102    Input Parameters:
1103 .  A - the matrix
1104 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1105    to control all matrix memory allocation.
1106 
1107    Notes:
1108    The dense format is fully compatible with standard Fortran 77
1109    storage by columns.
1110 
1111    The data input variable is intended primarily for Fortran programmers
1112    who wish to allocate their own matrix memory space.  Most users should
1113    set data=PETSC_NULL.
1114 
1115    Level: intermediate
1116 
1117 .keywords: matrix,dense, parallel
1118 
1119 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1120 @*/
1121 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1122 {
1123   int ierr,(*f)(Mat,PetscScalar *);
1124 
1125   PetscFunctionBegin;
1126   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1127   if (f) {
1128     ierr = (*f)(mat,data);CHKERRQ(ierr);
1129   }
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatCreateMPIDense"
1135 /*@C
1136    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1137 
1138    Collective on MPI_Comm
1139 
1140    Input Parameters:
1141 +  comm - MPI communicator
1142 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1143 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1144 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1145 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1146 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1147    to control all matrix memory allocation.
1148 
1149    Output Parameter:
1150 .  A - the matrix
1151 
1152    Notes:
1153    The dense format is fully compatible with standard Fortran 77
1154    storage by columns.
1155 
1156    The data input variable is intended primarily for Fortran programmers
1157    who wish to allocate their own matrix memory space.  Most users should
1158    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1159 
1160    The user MUST specify either the local or global matrix dimensions
1161    (possibly both).
1162 
1163    Level: intermediate
1164 
1165 .keywords: matrix,dense, parallel
1166 
1167 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1168 @*/
1169 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1170 {
1171   int ierr,size;
1172 
1173   PetscFunctionBegin;
1174   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1175   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1176   if (size > 1) {
1177     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1178     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1179   } else {
1180     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1181     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1182   }
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatDuplicate_MPIDense"
1188 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1189 {
1190   Mat          mat;
1191   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1192   int          ierr;
1193 
1194   PetscFunctionBegin;
1195   *newmat       = 0;
1196   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1197   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1198   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1199   mat->data         = (void*)a;
1200   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1201   mat->factor       = A->factor;
1202   mat->assembled    = PETSC_TRUE;
1203   mat->preallocated = PETSC_TRUE;
1204 
1205   a->rstart       = oldmat->rstart;
1206   a->rend         = oldmat->rend;
1207   a->size         = oldmat->size;
1208   a->rank         = oldmat->rank;
1209   mat->insertmode = NOT_SET_VALUES;
1210   a->nvec         = oldmat->nvec;
1211   a->donotstash   = oldmat->donotstash;
1212   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1213   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1214   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1215   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1216 
1217   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1218   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1219   PetscLogObjectParent(mat,a->A);
1220   *newmat = mat;
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #include "petscsys.h"
1225 
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1228 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1229 {
1230   int          *rowners,i,size,rank,m,ierr,nz,j;
1231   PetscScalar  *array,*vals,*vals_ptr;
1232   MPI_Status   status;
1233 
1234   PetscFunctionBegin;
1235   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1236   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1237 
1238   /* determine ownership of all rows */
1239   m          = M/size + ((M % size) > rank);
1240   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1241   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1242   rowners[0] = 0;
1243   for (i=2; i<=size; i++) {
1244     rowners[i] += rowners[i-1];
1245   }
1246 
1247   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1248   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1249 
1250   if (!rank) {
1251     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1252 
1253     /* read in my part of the matrix numerical values  */
1254     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1255 
1256     /* insert into matrix-by row (this is why cannot directly read into array */
1257     vals_ptr = vals;
1258     for (i=0; i<m; i++) {
1259       for (j=0; j<N; j++) {
1260         array[i + j*m] = *vals_ptr++;
1261       }
1262     }
1263 
1264     /* read in other processors and ship out */
1265     for (i=1; i<size; i++) {
1266       nz   = (rowners[i+1] - rowners[i])*N;
1267       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1268       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1269     }
1270   } else {
1271     /* receive numeric values */
1272     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1273 
1274     /* receive message of values*/
1275     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1276 
1277     /* insert into matrix-by row (this is why cannot directly read into array */
1278     vals_ptr = vals;
1279     for (i=0; i<m; i++) {
1280       for (j=0; j<N; j++) {
1281         array[i + j*m] = *vals_ptr++;
1282       }
1283     }
1284   }
1285   ierr = PetscFree(rowners);CHKERRQ(ierr);
1286   ierr = PetscFree(vals);CHKERRQ(ierr);
1287   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1288   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatLoad_MPIDense"
1294 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1295 {
1296   Mat          A;
1297   PetscScalar  *vals,*svals;
1298   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1299   MPI_Status   status;
1300   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1301   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1302   int          tag = ((PetscObject)viewer)->tag;
1303   int          i,nz,ierr,j,rstart,rend,fd;
1304 
1305   PetscFunctionBegin;
1306   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1307   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1308   if (!rank) {
1309     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1310     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1311     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1312   }
1313 
1314   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1315   M = header[1]; N = header[2]; nz = header[3];
1316 
1317   /*
1318        Handle case where matrix is stored on disk as a dense matrix
1319   */
1320   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1321     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1322     PetscFunctionReturn(0);
1323   }
1324 
1325   /* determine ownership of all rows */
1326   m          = M/size + ((M % size) > rank);
1327   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1328   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1329   rowners[0] = 0;
1330   for (i=2; i<=size; i++) {
1331     rowners[i] += rowners[i-1];
1332   }
1333   rstart = rowners[rank];
1334   rend   = rowners[rank+1];
1335 
1336   /* distribute row lengths to all processors */
1337   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1338   offlens = ourlens + (rend-rstart);
1339   if (!rank) {
1340     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1341     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1342     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1343     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1344     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1345     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1346   } else {
1347     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1348   }
1349 
1350   if (!rank) {
1351     /* calculate the number of nonzeros on each processor */
1352     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1353     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1354     for (i=0; i<size; i++) {
1355       for (j=rowners[i]; j< rowners[i+1]; j++) {
1356         procsnz[i] += rowlengths[j];
1357       }
1358     }
1359     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1360 
1361     /* determine max buffer needed and allocate it */
1362     maxnz = 0;
1363     for (i=0; i<size; i++) {
1364       maxnz = PetscMax(maxnz,procsnz[i]);
1365     }
1366     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1367 
1368     /* read in my part of the matrix column indices  */
1369     nz = procsnz[0];
1370     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1371     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1372 
1373     /* read in every one elses and ship off */
1374     for (i=1; i<size; i++) {
1375       nz   = procsnz[i];
1376       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1377       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1378     }
1379     ierr = PetscFree(cols);CHKERRQ(ierr);
1380   } else {
1381     /* determine buffer space needed for message */
1382     nz = 0;
1383     for (i=0; i<m; i++) {
1384       nz += ourlens[i];
1385     }
1386     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1387 
1388     /* receive message of column indices*/
1389     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1390     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1391     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1392   }
1393 
1394   /* loop over local rows, determining number of off diagonal entries */
1395   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1396   jj = 0;
1397   for (i=0; i<m; i++) {
1398     for (j=0; j<ourlens[i]; j++) {
1399       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1400       jj++;
1401     }
1402   }
1403 
1404   /* create our matrix */
1405   for (i=0; i<m; i++) {
1406     ourlens[i] -= offlens[i];
1407   }
1408   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1409   A = *newmat;
1410   for (i=0; i<m; i++) {
1411     ourlens[i] += offlens[i];
1412   }
1413 
1414   if (!rank) {
1415     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1416 
1417     /* read in my part of the matrix numerical values  */
1418     nz = procsnz[0];
1419     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1420 
1421     /* insert into matrix */
1422     jj      = rstart;
1423     smycols = mycols;
1424     svals   = vals;
1425     for (i=0; i<m; i++) {
1426       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1427       smycols += ourlens[i];
1428       svals   += ourlens[i];
1429       jj++;
1430     }
1431 
1432     /* read in other processors and ship out */
1433     for (i=1; i<size; i++) {
1434       nz   = procsnz[i];
1435       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1436       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1437     }
1438     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1439   } else {
1440     /* receive numeric values */
1441     ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1442 
1443     /* receive message of values*/
1444     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1445     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1446     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1447 
1448     /* insert into matrix */
1449     jj      = rstart;
1450     smycols = mycols;
1451     svals   = vals;
1452     for (i=0; i<m; i++) {
1453       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1454       smycols += ourlens[i];
1455       svals   += ourlens[i];
1456       jj++;
1457     }
1458   }
1459   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1460   ierr = PetscFree(vals);CHKERRQ(ierr);
1461   ierr = PetscFree(mycols);CHKERRQ(ierr);
1462   ierr = PetscFree(rowners);CHKERRQ(ierr);
1463 
1464   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1465   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 
1470 
1471 
1472 
1473