xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision be9f9ed3dfd084f007435b442bc709b44f0f745d)
1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNCT__
12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,ierr;
17   PetscScalar  *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
27   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
28   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatSetValues_MPIDense"
40 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
41 {
42   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
43   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
44   PetscTruth   roworiented = A->roworiented;
45 
46   PetscFunctionBegin;
47   for (i=0; i<m; i++) {
48     if (idxm[i] < 0) continue;
49     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
50     if (idxm[i] >= rstart && idxm[i] < rend) {
51       row = idxm[i] - rstart;
52       if (roworiented) {
53         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
54       } else {
55         for (j=0; j<n; j++) {
56           if (idxn[j] < 0) continue;
57           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
58           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
59         }
60       }
61     } else {
62       if (!A->donotstash) {
63         if (roworiented) {
64           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
65         } else {
66           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
67         }
68       }
69     }
70   }
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetValues_MPIDense"
76 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
77 {
78   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
79   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
80 
81   PetscFunctionBegin;
82   for (i=0; i<m; i++) {
83     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
84     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
85     if (idxm[i] >= rstart && idxm[i] < rend) {
86       row = idxm[i] - rstart;
87       for (j=0; j<n; j++) {
88         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
89         if (idxn[j] >= mat->N) {
90           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
91         }
92         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
93       }
94     } else {
95       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
96     }
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatGetArray_MPIDense"
103 int MatGetArray_MPIDense(Mat A,PetscScalar *array[])
104 {
105   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
106   int          ierr;
107 
108   PetscFunctionBegin;
109   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
110   PetscFunctionReturn(0);
111 }
112 
113 #undef __FUNCT__
114 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
115 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
116 {
117   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
118   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
119   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
120   PetscScalar  *av,*bv,*v = lmat->v;
121   Mat          newmat;
122 
123   PetscFunctionBegin;
124   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
125   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
126   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
127   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
128 
129   /* No parallel redistribution currently supported! Should really check each index set
130      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
131      original matrix! */
132 
133   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
134   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
135 
136   /* Check submatrix call */
137   if (scall == MAT_REUSE_MATRIX) {
138     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
139     /* Really need to test rows and column sizes! */
140     newmat = *B;
141   } else {
142     /* Create and fill new matrix */
143     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
144   }
145 
146   /* Now extract the data pointers and do the copy, column at a time */
147   newmatd = (Mat_MPIDense*)newmat->data;
148   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
149 
150   for (i=0; i<ncols; i++) {
151     av = v + nlrows*icol[i];
152     for (j=0; j<nrows; j++) {
153       *bv++ = av[irow[j] - rstart];
154     }
155   }
156 
157   /* Assemble the matrices so that the correct flags are set */
158   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
159   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
160 
161   /* Free work space */
162   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
163   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
164   *B = newmat;
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNCT__
169 #define __FUNCT__ "MatRestoreArray_MPIDense"
170 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
171 {
172   PetscFunctionBegin;
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNCT__
177 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
178 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
179 {
180   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
181   MPI_Comm     comm = mat->comm;
182   int          ierr,nstash,reallocs;
183   InsertMode   addv;
184 
185   PetscFunctionBegin;
186   /* make sure all processors are either in INSERTMODE or ADDMODE */
187   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
188   if (addv == (ADD_VALUES|INSERT_VALUES)) {
189     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
190   }
191   mat->insertmode = addv; /* in case this processor had no cache */
192 
193   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
194   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
195   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
196   PetscFunctionReturn(0);
197 }
198 
199 #undef __FUNCT__
200 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
201 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
202 {
203   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
204   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
205   PetscScalar  *val;
206   InsertMode   addv=mat->insertmode;
207 
208   PetscFunctionBegin;
209   /*  wait on receives */
210   while (1) {
211     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
212     if (!flg) break;
213 
214     for (i=0; i<n;) {
215       /* Now identify the consecutive vals belonging to the same row */
216       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
217       if (j < n) ncols = j-i;
218       else       ncols = n-i;
219       /* Now assemble all these values with a single function call */
220       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
221       i = j;
222     }
223   }
224   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
225 
226   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
227   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
228 
229   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
230     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
231   }
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatZeroEntries_MPIDense"
237 int MatZeroEntries_MPIDense(Mat A)
238 {
239   int          ierr;
240   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatGetBlockSize_MPIDense"
249 int MatGetBlockSize_MPIDense(Mat A,int *bs)
250 {
251   PetscFunctionBegin;
252   *bs = 1;
253   PetscFunctionReturn(0);
254 }
255 
256 /* the code does not do the diagonal entries correctly unless the
257    matrix is square and the column and row owerships are identical.
258    This is a BUG. The only way to fix it seems to be to access
259    mdn->A and mdn->B directly and not through the MatZeroRows()
260    routine.
261 */
262 #undef __FUNCT__
263 #define __FUNCT__ "MatZeroRows_MPIDense"
264 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
265 {
266   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
267   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
268   int            *nprocs,j,idx,nsends;
269   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
270   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
271   int            *lens,imdex,*lrows,*values;
272   MPI_Comm       comm = A->comm;
273   MPI_Request    *send_waits,*recv_waits;
274   MPI_Status     recv_status,*send_status;
275   IS             istmp;
276   PetscTruth     found;
277 
278   PetscFunctionBegin;
279   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
280   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
281 
282   /*  first count number of contributors to each processor */
283   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
284   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
285   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
286   for (i=0; i<N; i++) {
287     idx = rows[i];
288     found = PETSC_FALSE;
289     for (j=0; j<size; j++) {
290       if (idx >= owners[j] && idx < owners[j+1]) {
291         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
292       }
293     }
294     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
295   }
296   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
297 
298   /* inform other processors of number of messages and max length*/
299   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
300 
301   /* post receives:   */
302   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
303   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
304   for (i=0; i<nrecvs; i++) {
305     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
306   }
307 
308   /* do sends:
309       1) starts[i] gives the starting index in svalues for stuff going to
310          the ith processor
311   */
312   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
313   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
314   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
315   starts[0]  = 0;
316   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
317   for (i=0; i<N; i++) {
318     svalues[starts[owner[i]]++] = rows[i];
319   }
320   ISRestoreIndices(is,&rows);
321 
322   starts[0] = 0;
323   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
324   count = 0;
325   for (i=0; i<size; i++) {
326     if (nprocs[2*i+1]) {
327       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
328     }
329   }
330   ierr = PetscFree(starts);CHKERRQ(ierr);
331 
332   base = owners[rank];
333 
334   /*  wait on receives */
335   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
336   source = lens + nrecvs;
337   count  = nrecvs; slen = 0;
338   while (count) {
339     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
340     /* unpack receives into our local space */
341     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
342     source[imdex]  = recv_status.MPI_SOURCE;
343     lens[imdex]  = n;
344     slen += n;
345     count--;
346   }
347   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
348 
349   /* move the data into the send scatter */
350   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
351   count = 0;
352   for (i=0; i<nrecvs; i++) {
353     values = rvalues + i*nmax;
354     for (j=0; j<lens[i]; j++) {
355       lrows[count++] = values[j] - base;
356     }
357   }
358   ierr = PetscFree(rvalues);CHKERRQ(ierr);
359   ierr = PetscFree(lens);CHKERRQ(ierr);
360   ierr = PetscFree(owner);CHKERRQ(ierr);
361   ierr = PetscFree(nprocs);CHKERRQ(ierr);
362 
363   /* actually zap the local rows */
364   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
365   PetscLogObjectParent(A,istmp);
366   ierr = PetscFree(lrows);CHKERRQ(ierr);
367   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
368   ierr = ISDestroy(istmp);CHKERRQ(ierr);
369 
370   /* wait on sends */
371   if (nsends) {
372     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
373     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
374     ierr = PetscFree(send_status);CHKERRQ(ierr);
375   }
376   ierr = PetscFree(send_waits);CHKERRQ(ierr);
377   ierr = PetscFree(svalues);CHKERRQ(ierr);
378 
379   PetscFunctionReturn(0);
380 }
381 
382 #undef __FUNCT__
383 #define __FUNCT__ "MatMult_MPIDense"
384 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
385 {
386   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
387   int          ierr;
388 
389   PetscFunctionBegin;
390   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
391   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
392   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
393   PetscFunctionReturn(0);
394 }
395 
396 #undef __FUNCT__
397 #define __FUNCT__ "MatMultAdd_MPIDense"
398 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
399 {
400   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
401   int          ierr;
402 
403   PetscFunctionBegin;
404   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
405   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
406   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "MatMultTranspose_MPIDense"
412 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
413 {
414   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
415   int          ierr;
416   PetscScalar  zero = 0.0;
417 
418   PetscFunctionBegin;
419   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
420   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
421   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
422   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
428 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
429 {
430   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
431   int          ierr;
432 
433   PetscFunctionBegin;
434   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
435   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
436   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
437   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatGetDiagonal_MPIDense"
443 int MatGetDiagonal_MPIDense(Mat A,Vec v)
444 {
445   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
446   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
447   int          ierr,len,i,n,m = A->m,radd;
448   PetscScalar  *x,zero = 0.0;
449 
450   PetscFunctionBegin;
451   ierr = VecSet(&zero,v);CHKERRQ(ierr);
452   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
453   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
454   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
455   len  = PetscMin(a->A->m,a->A->n);
456   radd = a->rstart*m;
457   for (i=0; i<len; i++) {
458     x[i] = aloc->v[radd + i*m + i];
459   }
460   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatDestroy_MPIDense"
466 int MatDestroy_MPIDense(Mat mat)
467 {
468   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
469   int          ierr;
470 
471   PetscFunctionBegin;
472 
473 #if defined(PETSC_USE_LOG)
474   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
475 #endif
476   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
477   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
478   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
479   if (mdn->lvec)   VecDestroy(mdn->lvec);
480   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
481   if (mdn->factor) {
482     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
483     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
484     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
485     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
486   }
487   ierr = PetscFree(mdn);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatView_MPIDense_Binary"
493 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
494 {
495   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
496   int          ierr;
497 
498   PetscFunctionBegin;
499   if (mdn->size == 1) {
500     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
501   }
502   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
508 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
509 {
510   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
511   int               ierr,size = mdn->size,rank = mdn->rank;
512   PetscViewerType   vtype;
513   PetscTruth        isascii,isdraw;
514   PetscViewer       sviewer;
515   PetscViewerFormat format;
516 
517   PetscFunctionBegin;
518   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
519   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
520   if (isascii) {
521     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
522     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
523     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
524       MatInfo info;
525       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
526       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
527                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
528       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
529       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
530       PetscFunctionReturn(0);
531     } else if (format == PETSC_VIEWER_ASCII_INFO) {
532       PetscFunctionReturn(0);
533     }
534   } else if (isdraw) {
535     PetscDraw       draw;
536     PetscTruth isnull;
537 
538     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
539     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
540     if (isnull) PetscFunctionReturn(0);
541   }
542 
543   if (size == 1) {
544     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
545   } else {
546     /* assemble the entire matrix onto first processor. */
547     Mat          A;
548     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
549     PetscScalar  *vals;
550 
551     if (!rank) {
552       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
553     } else {
554       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
555     }
556     PetscLogObjectParent(mat,A);
557 
558     /* Copy the matrix ... This isn't the most efficient means,
559        but it's quick for now */
560     row = mdn->rstart; m = mdn->A->m;
561     for (i=0; i<m; i++) {
562       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
563       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
564       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
565       row++;
566     }
567 
568     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
569     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
571     if (!rank) {
572       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
573     }
574     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
575     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
576     ierr = MatDestroy(A);CHKERRQ(ierr);
577   }
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatView_MPIDense"
583 int MatView_MPIDense(Mat mat,PetscViewer viewer)
584 {
585   int        ierr;
586   PetscTruth isascii,isbinary,isdraw,issocket;
587 
588   PetscFunctionBegin;
589 
590   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
591   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
592   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
593   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
594 
595   if (isascii || issocket || isdraw) {
596     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
597   } else if (isbinary) {
598     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
599   } else {
600     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatGetInfo_MPIDense"
607 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
608 {
609   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
610   Mat          mdn = mat->A;
611   int          ierr;
612   PetscReal    isend[5],irecv[5];
613 
614   PetscFunctionBegin;
615   info->rows_global    = (double)A->M;
616   info->columns_global = (double)A->N;
617   info->rows_local     = (double)A->m;
618   info->columns_local  = (double)A->N;
619   info->block_size     = 1.0;
620   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
621   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
622   isend[3] = info->memory;  isend[4] = info->mallocs;
623   if (flag == MAT_LOCAL) {
624     info->nz_used      = isend[0];
625     info->nz_allocated = isend[1];
626     info->nz_unneeded  = isend[2];
627     info->memory       = isend[3];
628     info->mallocs      = isend[4];
629   } else if (flag == MAT_GLOBAL_MAX) {
630     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
631     info->nz_used      = irecv[0];
632     info->nz_allocated = irecv[1];
633     info->nz_unneeded  = irecv[2];
634     info->memory       = irecv[3];
635     info->mallocs      = irecv[4];
636   } else if (flag == MAT_GLOBAL_SUM) {
637     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
638     info->nz_used      = irecv[0];
639     info->nz_allocated = irecv[1];
640     info->nz_unneeded  = irecv[2];
641     info->memory       = irecv[3];
642     info->mallocs      = irecv[4];
643   }
644   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
645   info->fill_ratio_needed = 0;
646   info->factor_mallocs    = 0;
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNCT__
651 #define __FUNCT__ "MatSetOption_MPIDense"
652 int MatSetOption_MPIDense(Mat A,MatOption op)
653 {
654   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
655   int          ierr;
656 
657   PetscFunctionBegin;
658   switch (op) {
659   case MAT_NO_NEW_NONZERO_LOCATIONS:
660   case MAT_YES_NEW_NONZERO_LOCATIONS:
661   case MAT_NEW_NONZERO_LOCATION_ERR:
662   case MAT_NEW_NONZERO_ALLOCATION_ERR:
663   case MAT_COLUMNS_SORTED:
664   case MAT_COLUMNS_UNSORTED:
665     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
666     break;
667   case MAT_ROW_ORIENTED:
668     a->roworiented = PETSC_TRUE;
669     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
670     break;
671   case MAT_ROWS_SORTED:
672   case MAT_ROWS_UNSORTED:
673   case MAT_YES_NEW_DIAGONALS:
674   case MAT_USE_HASH_TABLE:
675     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
676     break;
677   case MAT_COLUMN_ORIENTED:
678     a->roworiented = PETSC_FALSE;
679     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
680     break;
681   case MAT_IGNORE_OFF_PROC_ENTRIES:
682     a->donotstash = PETSC_TRUE;
683     break;
684   case MAT_NO_NEW_DIAGONALS:
685     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
686   default:
687     SETERRQ(PETSC_ERR_SUP,"unknown option");
688   }
689   PetscFunctionReturn(0);
690 }
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatGetRow_MPIDense"
694 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
695 {
696   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
697   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
698 
699   PetscFunctionBegin;
700   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
701   lrow = row - rstart;
702   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNCT__
707 #define __FUNCT__ "MatRestoreRow_MPIDense"
708 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
709 {
710   int ierr;
711 
712   PetscFunctionBegin;
713   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
714   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
715   PetscFunctionReturn(0);
716 }
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatDiagonalScale_MPIDense"
720 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
721 {
722   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
723   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
724   PetscScalar  *l,*r,x,*v;
725   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
726 
727   PetscFunctionBegin;
728   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
729   if (ll) {
730     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
731     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
732     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
733     for (i=0; i<m; i++) {
734       x = l[i];
735       v = mat->v + i;
736       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
737     }
738     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
739     PetscLogFlops(n*m);
740   }
741   if (rr) {
742     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
743     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
744     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
745     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
746     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
747     for (i=0; i<n; i++) {
748       x = r[i];
749       v = mat->v + i*m;
750       for (j=0; j<m; j++) { (*v++) *= x;}
751     }
752     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
753     PetscLogFlops(n*m);
754   }
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatNorm_MPIDense"
760 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
761 {
762   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
763   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
764   int          ierr,i,j;
765   PetscReal    sum = 0.0;
766   PetscScalar  *v = mat->v;
767 
768   PetscFunctionBegin;
769   if (mdn->size == 1) {
770     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
771   } else {
772     if (type == NORM_FROBENIUS) {
773       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
774 #if defined(PETSC_USE_COMPLEX)
775         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
776 #else
777         sum += (*v)*(*v); v++;
778 #endif
779       }
780       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
781       *nrm = sqrt(*nrm);
782       PetscLogFlops(2*mdn->A->n*mdn->A->m);
783     } else if (type == NORM_1) {
784       PetscReal *tmp,*tmp2;
785       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
786       tmp2 = tmp + A->N;
787       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
788       *nrm = 0.0;
789       v = mat->v;
790       for (j=0; j<mdn->A->n; j++) {
791         for (i=0; i<mdn->A->m; i++) {
792           tmp[j] += PetscAbsScalar(*v);  v++;
793         }
794       }
795       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
796       for (j=0; j<A->N; j++) {
797         if (tmp2[j] > *nrm) *nrm = tmp2[j];
798       }
799       ierr = PetscFree(tmp);CHKERRQ(ierr);
800       PetscLogFlops(A->n*A->m);
801     } else if (type == NORM_INFINITY) { /* max row norm */
802       PetscReal ntemp;
803       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
804       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
805     } else {
806       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
807     }
808   }
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "MatTranspose_MPIDense"
814 int MatTranspose_MPIDense(Mat A,Mat *matout)
815 {
816   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
817   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
818   Mat          B;
819   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
820   int          j,i,ierr;
821   PetscScalar  *v;
822 
823   PetscFunctionBegin;
824   if (!matout && M != N) {
825     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
826   }
827   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
828 
829   m = a->A->m; n = a->A->n; v = Aloc->v;
830   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
831   for (j=0; j<n; j++) {
832     for (i=0; i<m; i++) rwork[i] = rstart + i;
833     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
834     v   += m;
835   }
836   ierr = PetscFree(rwork);CHKERRQ(ierr);
837   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
839   if (matout) {
840     *matout = B;
841   } else {
842     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
843   }
844   PetscFunctionReturn(0);
845 }
846 
847 #include "petscblaslapack.h"
848 #undef __FUNCT__
849 #define __FUNCT__ "MatScale_MPIDense"
850 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
851 {
852   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
853   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
854   int          one = 1,nz;
855 
856   PetscFunctionBegin;
857   nz = inA->m*inA->N;
858   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
859   PetscLogFlops(nz);
860   PetscFunctionReturn(0);
861 }
862 
863 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
867 int MatSetUpPreallocation_MPIDense(Mat A)
868 {
869   int        ierr;
870 
871   PetscFunctionBegin;
872   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 /* -------------------------------------------------------------------*/
877 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
878        MatGetRow_MPIDense,
879        MatRestoreRow_MPIDense,
880        MatMult_MPIDense,
881 /* 4*/ MatMultAdd_MPIDense,
882        MatMultTranspose_MPIDense,
883        MatMultTransposeAdd_MPIDense,
884        0,
885        0,
886        0,
887 /*10*/ 0,
888        0,
889        0,
890        0,
891        MatTranspose_MPIDense,
892 /*15*/ MatGetInfo_MPIDense,
893        0,
894        MatGetDiagonal_MPIDense,
895        MatDiagonalScale_MPIDense,
896        MatNorm_MPIDense,
897 /*20*/ MatAssemblyBegin_MPIDense,
898        MatAssemblyEnd_MPIDense,
899        0,
900        MatSetOption_MPIDense,
901        MatZeroEntries_MPIDense,
902 /*25*/ MatZeroRows_MPIDense,
903        0,
904        0,
905        0,
906        0,
907 /*30*/ MatSetUpPreallocation_MPIDense,
908        0,
909        0,
910        MatGetArray_MPIDense,
911        MatRestoreArray_MPIDense,
912 /*35*/ MatDuplicate_MPIDense,
913        0,
914        0,
915        0,
916        0,
917 /*40*/ 0,
918        MatGetSubMatrices_MPIDense,
919        0,
920        MatGetValues_MPIDense,
921        0,
922 /*45*/ 0,
923        MatScale_MPIDense,
924        0,
925        0,
926        0,
927 /*50*/ MatGetBlockSize_MPIDense,
928        0,
929        0,
930        0,
931        0,
932 /*55*/ 0,
933        0,
934        0,
935        0,
936        0,
937 /*60*/ MatGetSubMatrix_MPIDense,
938        MatDestroy_MPIDense,
939        MatView_MPIDense,
940        MatGetPetscMaps_Petsc,
941        0,
942 /*65*/ 0,
943        0,
944        0,
945        0,
946        0,
947 /*70*/ 0,
948        0,
949        0,
950        0,
951        0,
952 /*75*/ 0,
953        0,
954        0,
955        0,
956        0,
957 /*80*/ 0,
958        0,
959        0,
960        0,
961        0,
962 /*85*/ MatLoad_MPIDense};
963 
964 EXTERN_C_BEGIN
965 #undef __FUNCT__
966 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
967 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
968 {
969   Mat_MPIDense *a;
970   int          ierr;
971 
972   PetscFunctionBegin;
973   mat->preallocated = PETSC_TRUE;
974   /* Note:  For now, when data is specified above, this assumes the user correctly
975    allocates the local dense storage space.  We should add error checking. */
976 
977   a    = (Mat_MPIDense*)mat->data;
978   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
979   PetscLogObjectParent(mat,a->A);
980   PetscFunctionReturn(0);
981 }
982 EXTERN_C_END
983 
984 /*MC
985    MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
986 
987    Options Database Keys:
988 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
989 
990   Level: beginner
991 
992 .seealso: MatCreateMPIDense
993 M*/
994 
995 EXTERN_C_BEGIN
996 #undef __FUNCT__
997 #define __FUNCT__ "MatCreate_MPIDense"
998 int MatCreate_MPIDense(Mat mat)
999 {
1000   Mat_MPIDense *a;
1001   int          ierr,i;
1002 
1003   PetscFunctionBegin;
1004   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1005   mat->data         = (void*)a;
1006   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1007   mat->factor       = 0;
1008   mat->mapping      = 0;
1009 
1010   a->factor       = 0;
1011   mat->insertmode = NOT_SET_VALUES;
1012   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1013   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1014 
1015   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1016   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1017   a->nvec = mat->n;
1018 
1019   /* the information in the maps duplicates the information computed below, eventually
1020      we should remove the duplicate information that is not contained in the maps */
1021   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1022   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1023 
1024   /* build local table of row and column ownerships */
1025   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1026   a->cowners = a->rowners + a->size + 1;
1027   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1028   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1029   a->rowners[0] = 0;
1030   for (i=2; i<=a->size; i++) {
1031     a->rowners[i] += a->rowners[i-1];
1032   }
1033   a->rstart = a->rowners[a->rank];
1034   a->rend   = a->rowners[a->rank+1];
1035   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1036   a->cowners[0] = 0;
1037   for (i=2; i<=a->size; i++) {
1038     a->cowners[i] += a->cowners[i-1];
1039   }
1040 
1041   /* build cache for off array entries formed */
1042   a->donotstash = PETSC_FALSE;
1043   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1044 
1045   /* stuff used for matrix vector multiply */
1046   a->lvec        = 0;
1047   a->Mvctx       = 0;
1048   a->roworiented = PETSC_TRUE;
1049 
1050   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1051                                      "MatGetDiagonalBlock_MPIDense",
1052                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1053   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1054                                      "MatMPIDenseSetPreallocation_MPIDense",
1055                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 EXTERN_C_END
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1062 /*@C
1063    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1064 
1065    Not collective
1066 
1067    Input Parameters:
1068 .  A - the matrix
1069 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1070    to control all matrix memory allocation.
1071 
1072    Notes:
1073    The dense format is fully compatible with standard Fortran 77
1074    storage by columns.
1075 
1076    The data input variable is intended primarily for Fortran programmers
1077    who wish to allocate their own matrix memory space.  Most users should
1078    set data=PETSC_NULL.
1079 
1080    Level: intermediate
1081 
1082 .keywords: matrix,dense, parallel
1083 
1084 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1085 @*/
1086 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1087 {
1088   int ierr,(*f)(Mat,PetscScalar *);
1089 
1090   PetscFunctionBegin;
1091   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1092   if (f) {
1093     ierr = (*f)(mat,data);CHKERRQ(ierr);
1094   }
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 #undef __FUNCT__
1099 #define __FUNCT__ "MatCreateMPIDense"
1100 /*@C
1101    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1102 
1103    Collective on MPI_Comm
1104 
1105    Input Parameters:
1106 +  comm - MPI communicator
1107 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1108 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1109 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1110 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1111 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1112    to control all matrix memory allocation.
1113 
1114    Output Parameter:
1115 .  A - the matrix
1116 
1117    Notes:
1118    The dense format is fully compatible with standard Fortran 77
1119    storage by columns.
1120 
1121    The data input variable is intended primarily for Fortran programmers
1122    who wish to allocate their own matrix memory space.  Most users should
1123    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1124 
1125    The user MUST specify either the local or global matrix dimensions
1126    (possibly both).
1127 
1128    Level: intermediate
1129 
1130 .keywords: matrix,dense, parallel
1131 
1132 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1133 @*/
1134 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1135 {
1136   int ierr,size;
1137 
1138   PetscFunctionBegin;
1139   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1140   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1141   if (size > 1) {
1142     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1143     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1144   } else {
1145     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1146     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1147   }
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 #undef __FUNCT__
1152 #define __FUNCT__ "MatDuplicate_MPIDense"
1153 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1154 {
1155   Mat          mat;
1156   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1157   int          ierr;
1158 
1159   PetscFunctionBegin;
1160   *newmat       = 0;
1161   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1162   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1163   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1164   mat->data         = (void*)a;
1165   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1166   mat->factor       = A->factor;
1167   mat->assembled    = PETSC_TRUE;
1168   mat->preallocated = PETSC_TRUE;
1169 
1170   a->rstart       = oldmat->rstart;
1171   a->rend         = oldmat->rend;
1172   a->size         = oldmat->size;
1173   a->rank         = oldmat->rank;
1174   mat->insertmode = NOT_SET_VALUES;
1175   a->nvec         = oldmat->nvec;
1176   a->donotstash   = oldmat->donotstash;
1177   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1178   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1179   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1180   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1181 
1182   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1183   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1184   PetscLogObjectParent(mat,a->A);
1185   *newmat = mat;
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #include "petscsys.h"
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1193 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1194 {
1195   int          *rowners,i,size,rank,m,ierr,nz,j;
1196   PetscScalar  *array,*vals,*vals_ptr;
1197   MPI_Status   status;
1198 
1199   PetscFunctionBegin;
1200   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1201   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1202 
1203   /* determine ownership of all rows */
1204   m          = M/size + ((M % size) > rank);
1205   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1206   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1207   rowners[0] = 0;
1208   for (i=2; i<=size; i++) {
1209     rowners[i] += rowners[i-1];
1210   }
1211 
1212   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1213   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1214 
1215   if (!rank) {
1216     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1217 
1218     /* read in my part of the matrix numerical values  */
1219     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1220 
1221     /* insert into matrix-by row (this is why cannot directly read into array */
1222     vals_ptr = vals;
1223     for (i=0; i<m; i++) {
1224       for (j=0; j<N; j++) {
1225         array[i + j*m] = *vals_ptr++;
1226       }
1227     }
1228 
1229     /* read in other processors and ship out */
1230     for (i=1; i<size; i++) {
1231       nz   = (rowners[i+1] - rowners[i])*N;
1232       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1233       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1234     }
1235   } else {
1236     /* receive numeric values */
1237     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1238 
1239     /* receive message of values*/
1240     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1241 
1242     /* insert into matrix-by row (this is why cannot directly read into array */
1243     vals_ptr = vals;
1244     for (i=0; i<m; i++) {
1245       for (j=0; j<N; j++) {
1246         array[i + j*m] = *vals_ptr++;
1247       }
1248     }
1249   }
1250   ierr = PetscFree(rowners);CHKERRQ(ierr);
1251   ierr = PetscFree(vals);CHKERRQ(ierr);
1252   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1253   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatLoad_MPIDense"
1259 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1260 {
1261   Mat          A;
1262   PetscScalar  *vals,*svals;
1263   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1264   MPI_Status   status;
1265   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1266   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1267   int          tag = ((PetscObject)viewer)->tag;
1268   int          i,nz,ierr,j,rstart,rend,fd;
1269 
1270   PetscFunctionBegin;
1271   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1272   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1273   if (!rank) {
1274     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1275     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1276     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1277   }
1278 
1279   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1280   M = header[1]; N = header[2]; nz = header[3];
1281 
1282   /*
1283        Handle case where matrix is stored on disk as a dense matrix
1284   */
1285   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1286     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1287     PetscFunctionReturn(0);
1288   }
1289 
1290   /* determine ownership of all rows */
1291   m          = M/size + ((M % size) > rank);
1292   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1293   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1294   rowners[0] = 0;
1295   for (i=2; i<=size; i++) {
1296     rowners[i] += rowners[i-1];
1297   }
1298   rstart = rowners[rank];
1299   rend   = rowners[rank+1];
1300 
1301   /* distribute row lengths to all processors */
1302   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1303   offlens = ourlens + (rend-rstart);
1304   if (!rank) {
1305     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1306     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1307     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1308     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1309     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1310     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1311   } else {
1312     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1313   }
1314 
1315   if (!rank) {
1316     /* calculate the number of nonzeros on each processor */
1317     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1318     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1319     for (i=0; i<size; i++) {
1320       for (j=rowners[i]; j< rowners[i+1]; j++) {
1321         procsnz[i] += rowlengths[j];
1322       }
1323     }
1324     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1325 
1326     /* determine max buffer needed and allocate it */
1327     maxnz = 0;
1328     for (i=0; i<size; i++) {
1329       maxnz = PetscMax(maxnz,procsnz[i]);
1330     }
1331     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1332 
1333     /* read in my part of the matrix column indices  */
1334     nz = procsnz[0];
1335     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1336     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1337 
1338     /* read in every one elses and ship off */
1339     for (i=1; i<size; i++) {
1340       nz   = procsnz[i];
1341       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1342       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1343     }
1344     ierr = PetscFree(cols);CHKERRQ(ierr);
1345   } else {
1346     /* determine buffer space needed for message */
1347     nz = 0;
1348     for (i=0; i<m; i++) {
1349       nz += ourlens[i];
1350     }
1351     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1352 
1353     /* receive message of column indices*/
1354     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1355     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1356     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1357   }
1358 
1359   /* loop over local rows, determining number of off diagonal entries */
1360   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1361   jj = 0;
1362   for (i=0; i<m; i++) {
1363     for (j=0; j<ourlens[i]; j++) {
1364       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1365       jj++;
1366     }
1367   }
1368 
1369   /* create our matrix */
1370   for (i=0; i<m; i++) {
1371     ourlens[i] -= offlens[i];
1372   }
1373   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1374   A = *newmat;
1375   for (i=0; i<m; i++) {
1376     ourlens[i] += offlens[i];
1377   }
1378 
1379   if (!rank) {
1380     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1381 
1382     /* read in my part of the matrix numerical values  */
1383     nz = procsnz[0];
1384     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1385 
1386     /* insert into matrix */
1387     jj      = rstart;
1388     smycols = mycols;
1389     svals   = vals;
1390     for (i=0; i<m; i++) {
1391       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1392       smycols += ourlens[i];
1393       svals   += ourlens[i];
1394       jj++;
1395     }
1396 
1397     /* read in other processors and ship out */
1398     for (i=1; i<size; i++) {
1399       nz   = procsnz[i];
1400       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1401       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1402     }
1403     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1404   } else {
1405     /* receive numeric values */
1406     ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1407 
1408     /* receive message of values*/
1409     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1410     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1411     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1412 
1413     /* insert into matrix */
1414     jj      = rstart;
1415     smycols = mycols;
1416     svals   = vals;
1417     for (i=0; i<m; i++) {
1418       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1419       smycols += ourlens[i];
1420       svals   += ourlens[i];
1421       jj++;
1422     }
1423   }
1424   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1425   ierr = PetscFree(vals);CHKERRQ(ierr);
1426   ierr = PetscFree(mycols);CHKERRQ(ierr);
1427   ierr = PetscFree(rowners);CHKERRQ(ierr);
1428 
1429   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1430   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 
1435 
1436 
1437 
1438