xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4482741e5b2e2bbc854fb1f8dba65221386520f2)
1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 
9 EXTERN_C_BEGIN
10 #undef __FUNCT__
11 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
12 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
13 {
14   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
15   int          m = A->m,rstart = mdn->rstart,ierr;
16   PetscScalar  *array;
17   MPI_Comm     comm;
18 
19   PetscFunctionBegin;
20   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
21 
22   /* The reuse aspect is not implemented efficiently */
23   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
24 
25   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
26   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
27   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
28   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
29   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
30   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31 
32   *iscopy = PETSC_TRUE;
33   PetscFunctionReturn(0);
34 }
35 EXTERN_C_END
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatSetValues_MPIDense"
39 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
40 {
41   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
42   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
43   PetscTruth   roworiented = A->roworiented;
44 
45   PetscFunctionBegin;
46   for (i=0; i<m; i++) {
47     if (idxm[i] < 0) continue;
48     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
49     if (idxm[i] >= rstart && idxm[i] < rend) {
50       row = idxm[i] - rstart;
51       if (roworiented) {
52         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
53       } else {
54         for (j=0; j<n; j++) {
55           if (idxn[j] < 0) continue;
56           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
57           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
58         }
59       }
60     } else {
61       if (!A->donotstash) {
62         if (roworiented) {
63           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
64         } else {
65           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
66         }
67       }
68     }
69   }
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatGetValues_MPIDense"
75 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
76 {
77   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
78   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
79 
80   PetscFunctionBegin;
81   for (i=0; i<m; i++) {
82     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
83     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
84     if (idxm[i] >= rstart && idxm[i] < rend) {
85       row = idxm[i] - rstart;
86       for (j=0; j<n; j++) {
87         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
88         if (idxn[j] >= mat->N) {
89           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
90         }
91         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
92       }
93     } else {
94       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
95     }
96   }
97   PetscFunctionReturn(0);
98 }
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "MatGetArray_MPIDense"
102 int MatGetArray_MPIDense(Mat A,PetscScalar *array[])
103 {
104   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
105   int          ierr;
106 
107   PetscFunctionBegin;
108   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
109   PetscFunctionReturn(0);
110 }
111 
112 #undef __FUNCT__
113 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
114 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
115 {
116   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
117   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
118   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
119   PetscScalar  *av,*bv,*v = lmat->v;
120   Mat          newmat;
121 
122   PetscFunctionBegin;
123   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
124   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
125   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
126   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
127 
128   /* No parallel redistribution currently supported! Should really check each index set
129      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
130      original matrix! */
131 
132   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
133   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
134 
135   /* Check submatrix call */
136   if (scall == MAT_REUSE_MATRIX) {
137     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
138     /* Really need to test rows and column sizes! */
139     newmat = *B;
140   } else {
141     /* Create and fill new matrix */
142     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
143   }
144 
145   /* Now extract the data pointers and do the copy, column at a time */
146   newmatd = (Mat_MPIDense*)newmat->data;
147   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
148 
149   for (i=0; i<ncols; i++) {
150     av = v + nlrows*icol[i];
151     for (j=0; j<nrows; j++) {
152       *bv++ = av[irow[j] - rstart];
153     }
154   }
155 
156   /* Assemble the matrices so that the correct flags are set */
157   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
158   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
159 
160   /* Free work space */
161   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
162   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
163   *B = newmat;
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatRestoreArray_MPIDense"
169 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
170 {
171   PetscFunctionBegin;
172   PetscFunctionReturn(0);
173 }
174 
175 #undef __FUNCT__
176 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
177 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
178 {
179   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
180   MPI_Comm     comm = mat->comm;
181   int          ierr,nstash,reallocs;
182   InsertMode   addv;
183 
184   PetscFunctionBegin;
185   /* make sure all processors are either in INSERTMODE or ADDMODE */
186   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
187   if (addv == (ADD_VALUES|INSERT_VALUES)) {
188     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
189   }
190   mat->insertmode = addv; /* in case this processor had no cache */
191 
192   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
193   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
194   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
200 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
201 {
202   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
203   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
204   PetscScalar  *val;
205   InsertMode   addv=mat->insertmode;
206 
207   PetscFunctionBegin;
208   /*  wait on receives */
209   while (1) {
210     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
211     if (!flg) break;
212 
213     for (i=0; i<n;) {
214       /* Now identify the consecutive vals belonging to the same row */
215       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
216       if (j < n) ncols = j-i;
217       else       ncols = n-i;
218       /* Now assemble all these values with a single function call */
219       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
220       i = j;
221     }
222   }
223   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
224 
225   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
226   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
227 
228   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
229     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
230   }
231   PetscFunctionReturn(0);
232 }
233 
234 #undef __FUNCT__
235 #define __FUNCT__ "MatZeroEntries_MPIDense"
236 int MatZeroEntries_MPIDense(Mat A)
237 {
238   int          ierr;
239   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
240 
241   PetscFunctionBegin;
242   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatGetBlockSize_MPIDense"
248 int MatGetBlockSize_MPIDense(Mat A,int *bs)
249 {
250   PetscFunctionBegin;
251   *bs = 1;
252   PetscFunctionReturn(0);
253 }
254 
255 /* the code does not do the diagonal entries correctly unless the
256    matrix is square and the column and row owerships are identical.
257    This is a BUG. The only way to fix it seems to be to access
258    mdn->A and mdn->B directly and not through the MatZeroRows()
259    routine.
260 */
261 #undef __FUNCT__
262 #define __FUNCT__ "MatZeroRows_MPIDense"
263 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
264 {
265   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
266   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
267   int            *nprocs,j,idx,nsends;
268   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
269   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
270   int            *lens,imdex,*lrows,*values;
271   MPI_Comm       comm = A->comm;
272   MPI_Request    *send_waits,*recv_waits;
273   MPI_Status     recv_status,*send_status;
274   IS             istmp;
275   PetscTruth     found;
276 
277   PetscFunctionBegin;
278   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
279   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
280 
281   /*  first count number of contributors to each processor */
282   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
283   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
284   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
285   for (i=0; i<N; i++) {
286     idx = rows[i];
287     found = PETSC_FALSE;
288     for (j=0; j<size; j++) {
289       if (idx >= owners[j] && idx < owners[j+1]) {
290         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
291       }
292     }
293     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
294   }
295   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
296 
297   /* inform other processors of number of messages and max length*/
298   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
299 
300   /* post receives:   */
301   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
302   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
303   for (i=0; i<nrecvs; i++) {
304     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
305   }
306 
307   /* do sends:
308       1) starts[i] gives the starting index in svalues for stuff going to
309          the ith processor
310   */
311   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
312   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
313   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
314   starts[0]  = 0;
315   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
316   for (i=0; i<N; i++) {
317     svalues[starts[owner[i]]++] = rows[i];
318   }
319   ISRestoreIndices(is,&rows);
320 
321   starts[0] = 0;
322   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
323   count = 0;
324   for (i=0; i<size; i++) {
325     if (nprocs[2*i+1]) {
326       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
327     }
328   }
329   ierr = PetscFree(starts);CHKERRQ(ierr);
330 
331   base = owners[rank];
332 
333   /*  wait on receives */
334   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
335   source = lens + nrecvs;
336   count  = nrecvs; slen = 0;
337   while (count) {
338     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
339     /* unpack receives into our local space */
340     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
341     source[imdex]  = recv_status.MPI_SOURCE;
342     lens[imdex]  = n;
343     slen += n;
344     count--;
345   }
346   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
347 
348   /* move the data into the send scatter */
349   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
350   count = 0;
351   for (i=0; i<nrecvs; i++) {
352     values = rvalues + i*nmax;
353     for (j=0; j<lens[i]; j++) {
354       lrows[count++] = values[j] - base;
355     }
356   }
357   ierr = PetscFree(rvalues);CHKERRQ(ierr);
358   ierr = PetscFree(lens);CHKERRQ(ierr);
359   ierr = PetscFree(owner);CHKERRQ(ierr);
360   ierr = PetscFree(nprocs);CHKERRQ(ierr);
361 
362   /* actually zap the local rows */
363   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
364   PetscLogObjectParent(A,istmp);
365   ierr = PetscFree(lrows);CHKERRQ(ierr);
366   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
367   ierr = ISDestroy(istmp);CHKERRQ(ierr);
368 
369   /* wait on sends */
370   if (nsends) {
371     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
372     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
373     ierr = PetscFree(send_status);CHKERRQ(ierr);
374   }
375   ierr = PetscFree(send_waits);CHKERRQ(ierr);
376   ierr = PetscFree(svalues);CHKERRQ(ierr);
377 
378   PetscFunctionReturn(0);
379 }
380 
381 #undef __FUNCT__
382 #define __FUNCT__ "MatMult_MPIDense"
383 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
384 {
385   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
386   int          ierr;
387 
388   PetscFunctionBegin;
389   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
390   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
391   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
392   PetscFunctionReturn(0);
393 }
394 
395 #undef __FUNCT__
396 #define __FUNCT__ "MatMultAdd_MPIDense"
397 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
398 {
399   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
400   int          ierr;
401 
402   PetscFunctionBegin;
403   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
404   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
405   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatMultTranspose_MPIDense"
411 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
412 {
413   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
414   int          ierr;
415   PetscScalar  zero = 0.0;
416 
417   PetscFunctionBegin;
418   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
419   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
420   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
421   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
427 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
428 {
429   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
430   int          ierr;
431 
432   PetscFunctionBegin;
433   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
434   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
435   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
436   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
437   PetscFunctionReturn(0);
438 }
439 
440 #undef __FUNCT__
441 #define __FUNCT__ "MatGetDiagonal_MPIDense"
442 int MatGetDiagonal_MPIDense(Mat A,Vec v)
443 {
444   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
445   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
446   int          ierr,len,i,n,m = A->m,radd;
447   PetscScalar  *x,zero = 0.0;
448 
449   PetscFunctionBegin;
450   ierr = VecSet(&zero,v);CHKERRQ(ierr);
451   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
452   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
453   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
454   len  = PetscMin(a->A->m,a->A->n);
455   radd = a->rstart*m;
456   for (i=0; i<len; i++) {
457     x[i] = aloc->v[radd + i*m + i];
458   }
459   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatDestroy_MPIDense"
465 int MatDestroy_MPIDense(Mat mat)
466 {
467   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
468   int          ierr;
469 
470   PetscFunctionBegin;
471 
472 #if defined(PETSC_USE_LOG)
473   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
474 #endif
475   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
476   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
477   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
478   if (mdn->lvec)   VecDestroy(mdn->lvec);
479   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
480   if (mdn->factor) {
481     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
482     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
483     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
484     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
485   }
486   ierr = PetscFree(mdn);CHKERRQ(ierr);
487   PetscFunctionReturn(0);
488 }
489 
490 #undef __FUNCT__
491 #define __FUNCT__ "MatView_MPIDense_Binary"
492 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
493 {
494   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
495   int          ierr;
496 
497   PetscFunctionBegin;
498   if (mdn->size == 1) {
499     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
500   }
501   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
502   PetscFunctionReturn(0);
503 }
504 
505 #undef __FUNCT__
506 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
507 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
508 {
509   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
510   int               ierr,size = mdn->size,rank = mdn->rank;
511   PetscViewerType   vtype;
512   PetscTruth        isascii,isdraw;
513   PetscViewer       sviewer;
514   PetscViewerFormat format;
515 
516   PetscFunctionBegin;
517   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
518   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
519   if (isascii) {
520     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
521     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
522     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
523       MatInfo info;
524       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
525       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
526                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
527       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
528       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
529       PetscFunctionReturn(0);
530     } else if (format == PETSC_VIEWER_ASCII_INFO) {
531       PetscFunctionReturn(0);
532     }
533   } else if (isdraw) {
534     PetscDraw       draw;
535     PetscTruth isnull;
536 
537     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
538     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
539     if (isnull) PetscFunctionReturn(0);
540   }
541 
542   if (size == 1) {
543     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
544   } else {
545     /* assemble the entire matrix onto first processor. */
546     Mat          A;
547     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
548     PetscScalar  *vals;
549 
550     if (!rank) {
551       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
552     } else {
553       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
554     }
555     PetscLogObjectParent(mat,A);
556 
557     /* Copy the matrix ... This isn't the most efficient means,
558        but it's quick for now */
559     row = mdn->rstart; m = mdn->A->m;
560     for (i=0; i<m; i++) {
561       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
562       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
563       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
564       row++;
565     }
566 
567     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
568     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
569     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
570     if (!rank) {
571       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
572     }
573     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
574     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
575     ierr = MatDestroy(A);CHKERRQ(ierr);
576   }
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatView_MPIDense"
582 int MatView_MPIDense(Mat mat,PetscViewer viewer)
583 {
584   int        ierr;
585   PetscTruth isascii,isbinary,isdraw,issocket;
586 
587   PetscFunctionBegin;
588 
589   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
590   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
591   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
592   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
593 
594   if (isascii || issocket || isdraw) {
595     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
596   } else if (isbinary) {
597     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
598   } else {
599     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatGetInfo_MPIDense"
606 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
607 {
608   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
609   Mat          mdn = mat->A;
610   int          ierr;
611   PetscReal    isend[5],irecv[5];
612 
613   PetscFunctionBegin;
614   info->rows_global    = (double)A->M;
615   info->columns_global = (double)A->N;
616   info->rows_local     = (double)A->m;
617   info->columns_local  = (double)A->N;
618   info->block_size     = 1.0;
619   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
620   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
621   isend[3] = info->memory;  isend[4] = info->mallocs;
622   if (flag == MAT_LOCAL) {
623     info->nz_used      = isend[0];
624     info->nz_allocated = isend[1];
625     info->nz_unneeded  = isend[2];
626     info->memory       = isend[3];
627     info->mallocs      = isend[4];
628   } else if (flag == MAT_GLOBAL_MAX) {
629     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
630     info->nz_used      = irecv[0];
631     info->nz_allocated = irecv[1];
632     info->nz_unneeded  = irecv[2];
633     info->memory       = irecv[3];
634     info->mallocs      = irecv[4];
635   } else if (flag == MAT_GLOBAL_SUM) {
636     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
637     info->nz_used      = irecv[0];
638     info->nz_allocated = irecv[1];
639     info->nz_unneeded  = irecv[2];
640     info->memory       = irecv[3];
641     info->mallocs      = irecv[4];
642   }
643   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
644   info->fill_ratio_needed = 0;
645   info->factor_mallocs    = 0;
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSetOption_MPIDense"
651 int MatSetOption_MPIDense(Mat A,MatOption op)
652 {
653   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
654   int          ierr;
655 
656   PetscFunctionBegin;
657   switch (op) {
658   case MAT_NO_NEW_NONZERO_LOCATIONS:
659   case MAT_YES_NEW_NONZERO_LOCATIONS:
660   case MAT_NEW_NONZERO_LOCATION_ERR:
661   case MAT_NEW_NONZERO_ALLOCATION_ERR:
662   case MAT_COLUMNS_SORTED:
663   case MAT_COLUMNS_UNSORTED:
664     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
665     break;
666   case MAT_ROW_ORIENTED:
667     a->roworiented = PETSC_TRUE;
668     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
669     break;
670   case MAT_ROWS_SORTED:
671   case MAT_ROWS_UNSORTED:
672   case MAT_YES_NEW_DIAGONALS:
673   case MAT_USE_HASH_TABLE:
674     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
675     break;
676   case MAT_COLUMN_ORIENTED:
677     a->roworiented = PETSC_FALSE;
678     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
679     break;
680   case MAT_IGNORE_OFF_PROC_ENTRIES:
681     a->donotstash = PETSC_TRUE;
682     break;
683   case MAT_NO_NEW_DIAGONALS:
684     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
685   case MAT_SYMMETRIC:
686   case MAT_STRUCTURALLY_SYMMETRIC:
687   case MAT_NOT_SYMMETRIC:
688   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
689   case MAT_HERMITIAN:
690   case MAT_NOT_HERMITIAN:
691   case MAT_SYMMETRY_ETERNAL:
692   case MAT_NOT_SYMMETRY_ETERNAL:
693     break;
694   default:
695     SETERRQ(PETSC_ERR_SUP,"unknown option");
696   }
697   PetscFunctionReturn(0);
698 }
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "MatGetRow_MPIDense"
702 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
703 {
704   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
705   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
706 
707   PetscFunctionBegin;
708   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
709   lrow = row - rstart;
710   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatRestoreRow_MPIDense"
716 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
717 {
718   int ierr;
719 
720   PetscFunctionBegin;
721   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
722   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatDiagonalScale_MPIDense"
728 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
729 {
730   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
731   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
732   PetscScalar  *l,*r,x,*v;
733   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
734 
735   PetscFunctionBegin;
736   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
737   if (ll) {
738     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
739     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
740     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
741     for (i=0; i<m; i++) {
742       x = l[i];
743       v = mat->v + i;
744       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
745     }
746     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
747     PetscLogFlops(n*m);
748   }
749   if (rr) {
750     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
751     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
752     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
753     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
754     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
755     for (i=0; i<n; i++) {
756       x = r[i];
757       v = mat->v + i*m;
758       for (j=0; j<m; j++) { (*v++) *= x;}
759     }
760     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
761     PetscLogFlops(n*m);
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatNorm_MPIDense"
768 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
769 {
770   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
771   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
772   int          ierr,i,j;
773   PetscReal    sum = 0.0;
774   PetscScalar  *v = mat->v;
775 
776   PetscFunctionBegin;
777   if (mdn->size == 1) {
778     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
779   } else {
780     if (type == NORM_FROBENIUS) {
781       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
782 #if defined(PETSC_USE_COMPLEX)
783         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
784 #else
785         sum += (*v)*(*v); v++;
786 #endif
787       }
788       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
789       *nrm = sqrt(*nrm);
790       PetscLogFlops(2*mdn->A->n*mdn->A->m);
791     } else if (type == NORM_1) {
792       PetscReal *tmp,*tmp2;
793       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
794       tmp2 = tmp + A->N;
795       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
796       *nrm = 0.0;
797       v = mat->v;
798       for (j=0; j<mdn->A->n; j++) {
799         for (i=0; i<mdn->A->m; i++) {
800           tmp[j] += PetscAbsScalar(*v);  v++;
801         }
802       }
803       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
804       for (j=0; j<A->N; j++) {
805         if (tmp2[j] > *nrm) *nrm = tmp2[j];
806       }
807       ierr = PetscFree(tmp);CHKERRQ(ierr);
808       PetscLogFlops(A->n*A->m);
809     } else if (type == NORM_INFINITY) { /* max row norm */
810       PetscReal ntemp;
811       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
812       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
813     } else {
814       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
815     }
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatTranspose_MPIDense"
822 int MatTranspose_MPIDense(Mat A,Mat *matout)
823 {
824   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
825   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
826   Mat          B;
827   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
828   int          j,i,ierr;
829   PetscScalar  *v;
830 
831   PetscFunctionBegin;
832   if (!matout && M != N) {
833     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
834   }
835   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
836 
837   m = a->A->m; n = a->A->n; v = Aloc->v;
838   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
839   for (j=0; j<n; j++) {
840     for (i=0; i<m; i++) rwork[i] = rstart + i;
841     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
842     v   += m;
843   }
844   ierr = PetscFree(rwork);CHKERRQ(ierr);
845   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
846   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
847   if (matout) {
848     *matout = B;
849   } else {
850     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 #include "petscblaslapack.h"
856 #undef __FUNCT__
857 #define __FUNCT__ "MatScale_MPIDense"
858 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
859 {
860   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
861   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
862   int          one = 1,nz;
863 
864   PetscFunctionBegin;
865   nz = inA->m*inA->N;
866   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
867   PetscLogFlops(nz);
868   PetscFunctionReturn(0);
869 }
870 
871 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
875 int MatSetUpPreallocation_MPIDense(Mat A)
876 {
877   int        ierr;
878 
879   PetscFunctionBegin;
880   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
881   PetscFunctionReturn(0);
882 }
883 
884 /* -------------------------------------------------------------------*/
885 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
886        MatGetRow_MPIDense,
887        MatRestoreRow_MPIDense,
888        MatMult_MPIDense,
889 /* 4*/ MatMultAdd_MPIDense,
890        MatMultTranspose_MPIDense,
891        MatMultTransposeAdd_MPIDense,
892        0,
893        0,
894        0,
895 /*10*/ 0,
896        0,
897        0,
898        0,
899        MatTranspose_MPIDense,
900 /*15*/ MatGetInfo_MPIDense,
901        0,
902        MatGetDiagonal_MPIDense,
903        MatDiagonalScale_MPIDense,
904        MatNorm_MPIDense,
905 /*20*/ MatAssemblyBegin_MPIDense,
906        MatAssemblyEnd_MPIDense,
907        0,
908        MatSetOption_MPIDense,
909        MatZeroEntries_MPIDense,
910 /*25*/ MatZeroRows_MPIDense,
911        0,
912        0,
913        0,
914        0,
915 /*30*/ MatSetUpPreallocation_MPIDense,
916        0,
917        0,
918        MatGetArray_MPIDense,
919        MatRestoreArray_MPIDense,
920 /*35*/ MatDuplicate_MPIDense,
921        0,
922        0,
923        0,
924        0,
925 /*40*/ 0,
926        MatGetSubMatrices_MPIDense,
927        0,
928        MatGetValues_MPIDense,
929        0,
930 /*45*/ 0,
931        MatScale_MPIDense,
932        0,
933        0,
934        0,
935 /*50*/ MatGetBlockSize_MPIDense,
936        0,
937        0,
938        0,
939        0,
940 /*55*/ 0,
941        0,
942        0,
943        0,
944        0,
945 /*60*/ MatGetSubMatrix_MPIDense,
946        MatDestroy_MPIDense,
947        MatView_MPIDense,
948        MatGetPetscMaps_Petsc,
949        0,
950 /*65*/ 0,
951        0,
952        0,
953        0,
954        0,
955 /*70*/ 0,
956        0,
957        0,
958        0,
959        0,
960 /*75*/ 0,
961        0,
962        0,
963        0,
964        0,
965 /*80*/ 0,
966        0,
967        0,
968        0,
969 /*85*/ MatLoad_MPIDense};
970 
971 EXTERN_C_BEGIN
972 #undef __FUNCT__
973 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
974 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
975 {
976   Mat_MPIDense *a;
977   int          ierr;
978 
979   PetscFunctionBegin;
980   mat->preallocated = PETSC_TRUE;
981   /* Note:  For now, when data is specified above, this assumes the user correctly
982    allocates the local dense storage space.  We should add error checking. */
983 
984   a    = (Mat_MPIDense*)mat->data;
985   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
986   PetscLogObjectParent(mat,a->A);
987   PetscFunctionReturn(0);
988 }
989 EXTERN_C_END
990 
991 /*MC
992    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
993 
994    Options Database Keys:
995 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
996 
997   Level: beginner
998 
999 .seealso: MatCreateMPIDense
1000 M*/
1001 
1002 EXTERN_C_BEGIN
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatCreate_MPIDense"
1005 int MatCreate_MPIDense(Mat mat)
1006 {
1007   Mat_MPIDense *a;
1008   int          ierr,i;
1009 
1010   PetscFunctionBegin;
1011   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1012   mat->data         = (void*)a;
1013   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1014   mat->factor       = 0;
1015   mat->mapping      = 0;
1016 
1017   a->factor       = 0;
1018   mat->insertmode = NOT_SET_VALUES;
1019   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1020   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1021 
1022   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1023   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1024   a->nvec = mat->n;
1025 
1026   /* the information in the maps duplicates the information computed below, eventually
1027      we should remove the duplicate information that is not contained in the maps */
1028   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1029   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1030 
1031   /* build local table of row and column ownerships */
1032   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1033   a->cowners = a->rowners + a->size + 1;
1034   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1035   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1036   a->rowners[0] = 0;
1037   for (i=2; i<=a->size; i++) {
1038     a->rowners[i] += a->rowners[i-1];
1039   }
1040   a->rstart = a->rowners[a->rank];
1041   a->rend   = a->rowners[a->rank+1];
1042   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1043   a->cowners[0] = 0;
1044   for (i=2; i<=a->size; i++) {
1045     a->cowners[i] += a->cowners[i-1];
1046   }
1047 
1048   /* build cache for off array entries formed */
1049   a->donotstash = PETSC_FALSE;
1050   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1051 
1052   /* stuff used for matrix vector multiply */
1053   a->lvec        = 0;
1054   a->Mvctx       = 0;
1055   a->roworiented = PETSC_TRUE;
1056 
1057   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1058                                      "MatGetDiagonalBlock_MPIDense",
1059                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1060   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1061                                      "MatMPIDenseSetPreallocation_MPIDense",
1062                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 EXTERN_C_END
1066 
1067 /*MC
1068    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1069 
1070    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1071    and MATMPIDENSE otherwise.
1072 
1073    Options Database Keys:
1074 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1075 
1076   Level: beginner
1077 
1078 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1079 M*/
1080 
1081 EXTERN_C_BEGIN
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatCreate_Dense"
1084 int MatCreate_Dense(Mat A) {
1085   int ierr,size;
1086 
1087   PetscFunctionBegin;
1088   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1089   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1090   if (size == 1) {
1091     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1092   } else {
1093     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1094   }
1095   PetscFunctionReturn(0);
1096 }
1097 EXTERN_C_END
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1101 /*@C
1102    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1103 
1104    Not collective
1105 
1106    Input Parameters:
1107 .  A - the matrix
1108 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1109    to control all matrix memory allocation.
1110 
1111    Notes:
1112    The dense format is fully compatible with standard Fortran 77
1113    storage by columns.
1114 
1115    The data input variable is intended primarily for Fortran programmers
1116    who wish to allocate their own matrix memory space.  Most users should
1117    set data=PETSC_NULL.
1118 
1119    Level: intermediate
1120 
1121 .keywords: matrix,dense, parallel
1122 
1123 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1124 @*/
1125 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1126 {
1127   int ierr,(*f)(Mat,PetscScalar *);
1128 
1129   PetscFunctionBegin;
1130   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1131   if (f) {
1132     ierr = (*f)(mat,data);CHKERRQ(ierr);
1133   }
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatCreateMPIDense"
1139 /*@C
1140    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1141 
1142    Collective on MPI_Comm
1143 
1144    Input Parameters:
1145 +  comm - MPI communicator
1146 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1147 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1148 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1149 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1150 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1151    to control all matrix memory allocation.
1152 
1153    Output Parameter:
1154 .  A - the matrix
1155 
1156    Notes:
1157    The dense format is fully compatible with standard Fortran 77
1158    storage by columns.
1159 
1160    The data input variable is intended primarily for Fortran programmers
1161    who wish to allocate their own matrix memory space.  Most users should
1162    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1163 
1164    The user MUST specify either the local or global matrix dimensions
1165    (possibly both).
1166 
1167    Level: intermediate
1168 
1169 .keywords: matrix,dense, parallel
1170 
1171 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1172 @*/
1173 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1174 {
1175   int ierr,size;
1176 
1177   PetscFunctionBegin;
1178   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1180   if (size > 1) {
1181     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1182     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1183   } else {
1184     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1185     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1186   }
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatDuplicate_MPIDense"
1192 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1193 {
1194   Mat          mat;
1195   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1196   int          ierr;
1197 
1198   PetscFunctionBegin;
1199   *newmat       = 0;
1200   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1201   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1202   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1203   mat->data         = (void*)a;
1204   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1205   mat->factor       = A->factor;
1206   mat->assembled    = PETSC_TRUE;
1207   mat->preallocated = PETSC_TRUE;
1208 
1209   a->rstart       = oldmat->rstart;
1210   a->rend         = oldmat->rend;
1211   a->size         = oldmat->size;
1212   a->rank         = oldmat->rank;
1213   mat->insertmode = NOT_SET_VALUES;
1214   a->nvec         = oldmat->nvec;
1215   a->donotstash   = oldmat->donotstash;
1216   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1217   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1218   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1219   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1220 
1221   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1222   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1223   PetscLogObjectParent(mat,a->A);
1224   *newmat = mat;
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #include "petscsys.h"
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1232 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1233 {
1234   int          *rowners,i,size,rank,m,ierr,nz,j;
1235   PetscScalar  *array,*vals,*vals_ptr;
1236   MPI_Status   status;
1237 
1238   PetscFunctionBegin;
1239   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1240   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1241 
1242   /* determine ownership of all rows */
1243   m          = M/size + ((M % size) > rank);
1244   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1245   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1246   rowners[0] = 0;
1247   for (i=2; i<=size; i++) {
1248     rowners[i] += rowners[i-1];
1249   }
1250 
1251   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1252   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1253 
1254   if (!rank) {
1255     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1256 
1257     /* read in my part of the matrix numerical values  */
1258     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1259 
1260     /* insert into matrix-by row (this is why cannot directly read into array */
1261     vals_ptr = vals;
1262     for (i=0; i<m; i++) {
1263       for (j=0; j<N; j++) {
1264         array[i + j*m] = *vals_ptr++;
1265       }
1266     }
1267 
1268     /* read in other processors and ship out */
1269     for (i=1; i<size; i++) {
1270       nz   = (rowners[i+1] - rowners[i])*N;
1271       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1272       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1273     }
1274   } else {
1275     /* receive numeric values */
1276     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1277 
1278     /* receive message of values*/
1279     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1280 
1281     /* insert into matrix-by row (this is why cannot directly read into array */
1282     vals_ptr = vals;
1283     for (i=0; i<m; i++) {
1284       for (j=0; j<N; j++) {
1285         array[i + j*m] = *vals_ptr++;
1286       }
1287     }
1288   }
1289   ierr = PetscFree(rowners);CHKERRQ(ierr);
1290   ierr = PetscFree(vals);CHKERRQ(ierr);
1291   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1292   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatLoad_MPIDense"
1298 int MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1299 {
1300   Mat          A;
1301   PetscScalar  *vals,*svals;
1302   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1303   MPI_Status   status;
1304   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1305   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1306   int          tag = ((PetscObject)viewer)->tag;
1307   int          i,nz,ierr,j,rstart,rend,fd;
1308 
1309   PetscFunctionBegin;
1310   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1311   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1312   if (!rank) {
1313     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1314     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1315     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1316   }
1317 
1318   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1319   M = header[1]; N = header[2]; nz = header[3];
1320 
1321   /*
1322        Handle case where matrix is stored on disk as a dense matrix
1323   */
1324   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1325     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1326     PetscFunctionReturn(0);
1327   }
1328 
1329   /* determine ownership of all rows */
1330   m          = M/size + ((M % size) > rank);
1331   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1332   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1333   rowners[0] = 0;
1334   for (i=2; i<=size; i++) {
1335     rowners[i] += rowners[i-1];
1336   }
1337   rstart = rowners[rank];
1338   rend   = rowners[rank+1];
1339 
1340   /* distribute row lengths to all processors */
1341   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1342   offlens = ourlens + (rend-rstart);
1343   if (!rank) {
1344     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1345     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1346     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1347     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1348     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1349     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1350   } else {
1351     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1352   }
1353 
1354   if (!rank) {
1355     /* calculate the number of nonzeros on each processor */
1356     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1357     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1358     for (i=0; i<size; i++) {
1359       for (j=rowners[i]; j< rowners[i+1]; j++) {
1360         procsnz[i] += rowlengths[j];
1361       }
1362     }
1363     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1364 
1365     /* determine max buffer needed and allocate it */
1366     maxnz = 0;
1367     for (i=0; i<size; i++) {
1368       maxnz = PetscMax(maxnz,procsnz[i]);
1369     }
1370     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1371 
1372     /* read in my part of the matrix column indices  */
1373     nz = procsnz[0];
1374     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1375     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1376 
1377     /* read in every one elses and ship off */
1378     for (i=1; i<size; i++) {
1379       nz   = procsnz[i];
1380       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1381       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1382     }
1383     ierr = PetscFree(cols);CHKERRQ(ierr);
1384   } else {
1385     /* determine buffer space needed for message */
1386     nz = 0;
1387     for (i=0; i<m; i++) {
1388       nz += ourlens[i];
1389     }
1390     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1391 
1392     /* receive message of column indices*/
1393     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1394     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1395     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1396   }
1397 
1398   /* loop over local rows, determining number of off diagonal entries */
1399   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1400   jj = 0;
1401   for (i=0; i<m; i++) {
1402     for (j=0; j<ourlens[i]; j++) {
1403       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1404       jj++;
1405     }
1406   }
1407 
1408   /* create our matrix */
1409   for (i=0; i<m; i++) {
1410     ourlens[i] -= offlens[i];
1411   }
1412   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1413   A = *newmat;
1414   for (i=0; i<m; i++) {
1415     ourlens[i] += offlens[i];
1416   }
1417 
1418   if (!rank) {
1419     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1420 
1421     /* read in my part of the matrix numerical values  */
1422     nz = procsnz[0];
1423     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1424 
1425     /* insert into matrix */
1426     jj      = rstart;
1427     smycols = mycols;
1428     svals   = vals;
1429     for (i=0; i<m; i++) {
1430       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1431       smycols += ourlens[i];
1432       svals   += ourlens[i];
1433       jj++;
1434     }
1435 
1436     /* read in other processors and ship out */
1437     for (i=1; i<size; i++) {
1438       nz   = procsnz[i];
1439       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1440       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1441     }
1442     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1443   } else {
1444     /* receive numeric values */
1445     ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1446 
1447     /* receive message of values*/
1448     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1449     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1450     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1451 
1452     /* insert into matrix */
1453     jj      = rstart;
1454     smycols = mycols;
1455     svals   = vals;
1456     for (i=0; i<m; i++) {
1457       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1458       smycols += ourlens[i];
1459       svals   += ourlens[i];
1460       jj++;
1461     }
1462   }
1463   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1464   ierr = PetscFree(vals);CHKERRQ(ierr);
1465   ierr = PetscFree(mycols);CHKERRQ(ierr);
1466   ierr = PetscFree(rowners);CHKERRQ(ierr);
1467 
1468   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1469   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 
1474 
1475 
1476 
1477