xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 3cea93cac9ae2afbf7f7a0b05f63af60a96dad9e)
1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNCT__
12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,ierr;
17   PetscScalar  *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
27   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
28   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN int MatSetUpMultiply_MPIDense(Mat);
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatSetValues_MPIDense"
42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv)
43 {
44   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
45   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
46   PetscTruth   roworiented = A->roworiented;
47 
48   PetscFunctionBegin;
49   for (i=0; i<m; i++) {
50     if (idxm[i] < 0) continue;
51     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
52     if (idxm[i] >= rstart && idxm[i] < rend) {
53       row = idxm[i] - rstart;
54       if (roworiented) {
55         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
56       } else {
57         for (j=0; j<n; j++) {
58           if (idxn[j] < 0) continue;
59           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
60           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
61         }
62       }
63     } else {
64       if (!A->donotstash) {
65         if (roworiented) {
66           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
67         } else {
68           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
69         }
70       }
71     }
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetValues_MPIDense"
78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
79 {
80   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
81   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82 
83   PetscFunctionBegin;
84   for (i=0; i<m; i++) {
85     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for (j=0; j<n; j++) {
90         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91         if (idxn[j] >= mat->N) {
92           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93         }
94         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
95       }
96     } else {
97       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98     }
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatGetArray_MPIDense"
105 int MatGetArray_MPIDense(Mat A,PetscScalar **array)
106 {
107   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108   int          ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118 {
119   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
122   PetscScalar  *av,*bv,*v = lmat->v;
123   Mat          newmat;
124 
125   PetscFunctionBegin;
126   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
127   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
128   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
129   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
130 
131   /* No parallel redistribution currently supported! Should really check each index set
132      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
133      original matrix! */
134 
135   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
136   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
137 
138   /* Check submatrix call */
139   if (scall == MAT_REUSE_MATRIX) {
140     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
141     /* Really need to test rows and column sizes! */
142     newmat = *B;
143   } else {
144     /* Create and fill new matrix */
145     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
146   }
147 
148   /* Now extract the data pointers and do the copy, column at a time */
149   newmatd = (Mat_MPIDense*)newmat->data;
150   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
151 
152   for (i=0; i<ncols; i++) {
153     av = v + nlrows*icol[i];
154     for (j=0; j<nrows; j++) {
155       *bv++ = av[irow[j] - rstart];
156     }
157   }
158 
159   /* Assemble the matrices so that the correct flags are set */
160   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
161   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
162 
163   /* Free work space */
164   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
165   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
166   *B = newmat;
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatRestoreArray_MPIDense"
172 int MatRestoreArray_MPIDense(Mat A,PetscScalar **array)
173 {
174   PetscFunctionBegin;
175   PetscFunctionReturn(0);
176 }
177 
178 #undef __FUNCT__
179 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
181 {
182   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
183   MPI_Comm     comm = mat->comm;
184   int          ierr,nstash,reallocs;
185   InsertMode   addv;
186 
187   PetscFunctionBegin;
188   /* make sure all processors are either in INSERTMODE or ADDMODE */
189   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
190   if (addv == (ADD_VALUES|INSERT_VALUES)) {
191     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
192   }
193   mat->insertmode = addv; /* in case this processor had no cache */
194 
195   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
196   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
197   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
198   PetscFunctionReturn(0);
199 }
200 
201 #undef __FUNCT__
202 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
204 {
205   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
206   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
207   PetscScalar  *val;
208   InsertMode   addv=mat->insertmode;
209 
210   PetscFunctionBegin;
211   /*  wait on receives */
212   while (1) {
213     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
214     if (!flg) break;
215 
216     for (i=0; i<n;) {
217       /* Now identify the consecutive vals belonging to the same row */
218       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
219       if (j < n) ncols = j-i;
220       else       ncols = n-i;
221       /* Now assemble all these values with a single function call */
222       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
223       i = j;
224     }
225   }
226   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
227 
228   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
229   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
230 
231   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
232     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatZeroEntries_MPIDense"
239 int MatZeroEntries_MPIDense(Mat A)
240 {
241   int          ierr;
242   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
243 
244   PetscFunctionBegin;
245   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatGetBlockSize_MPIDense"
251 int MatGetBlockSize_MPIDense(Mat A,int *bs)
252 {
253   PetscFunctionBegin;
254   *bs = 1;
255   PetscFunctionReturn(0);
256 }
257 
258 /* the code does not do the diagonal entries correctly unless the
259    matrix is square and the column and row owerships are identical.
260    This is a BUG. The only way to fix it seems to be to access
261    mdn->A and mdn->B directly and not through the MatZeroRows()
262    routine.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatZeroRows_MPIDense"
266 int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag)
267 {
268   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
269   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
270   int            *nprocs,j,idx,nsends;
271   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
272   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
273   int            *lens,imdex,*lrows,*values;
274   MPI_Comm       comm = A->comm;
275   MPI_Request    *send_waits,*recv_waits;
276   MPI_Status     recv_status,*send_status;
277   IS             istmp;
278   PetscTruth     found;
279 
280   PetscFunctionBegin;
281   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
282   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
283 
284   /*  first count number of contributors to each processor */
285   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
286   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
287   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
288   for (i=0; i<N; i++) {
289     idx = rows[i];
290     found = PETSC_FALSE;
291     for (j=0; j<size; j++) {
292       if (idx >= owners[j] && idx < owners[j+1]) {
293         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
294       }
295     }
296     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
297   }
298   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
299 
300   /* inform other processors of number of messages and max length*/
301   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
302 
303   /* post receives:   */
304   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
305   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
306   for (i=0; i<nrecvs; i++) {
307     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
308   }
309 
310   /* do sends:
311       1) starts[i] gives the starting index in svalues for stuff going to
312          the ith processor
313   */
314   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
315   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
316   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
317   starts[0]  = 0;
318   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
319   for (i=0; i<N; i++) {
320     svalues[starts[owner[i]]++] = rows[i];
321   }
322   ISRestoreIndices(is,&rows);
323 
324   starts[0] = 0;
325   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
326   count = 0;
327   for (i=0; i<size; i++) {
328     if (nprocs[2*i+1]) {
329       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
330     }
331   }
332   ierr = PetscFree(starts);CHKERRQ(ierr);
333 
334   base = owners[rank];
335 
336   /*  wait on receives */
337   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
338   source = lens + nrecvs;
339   count  = nrecvs; slen = 0;
340   while (count) {
341     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
342     /* unpack receives into our local space */
343     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
344     source[imdex]  = recv_status.MPI_SOURCE;
345     lens[imdex]  = n;
346     slen += n;
347     count--;
348   }
349   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
350 
351   /* move the data into the send scatter */
352   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
353   count = 0;
354   for (i=0; i<nrecvs; i++) {
355     values = rvalues + i*nmax;
356     for (j=0; j<lens[i]; j++) {
357       lrows[count++] = values[j] - base;
358     }
359   }
360   ierr = PetscFree(rvalues);CHKERRQ(ierr);
361   ierr = PetscFree(lens);CHKERRQ(ierr);
362   ierr = PetscFree(owner);CHKERRQ(ierr);
363   ierr = PetscFree(nprocs);CHKERRQ(ierr);
364 
365   /* actually zap the local rows */
366   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
367   PetscLogObjectParent(A,istmp);
368   ierr = PetscFree(lrows);CHKERRQ(ierr);
369   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
370   ierr = ISDestroy(istmp);CHKERRQ(ierr);
371 
372   /* wait on sends */
373   if (nsends) {
374     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
375     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
376     ierr = PetscFree(send_status);CHKERRQ(ierr);
377   }
378   ierr = PetscFree(send_waits);CHKERRQ(ierr);
379   ierr = PetscFree(svalues);CHKERRQ(ierr);
380 
381   PetscFunctionReturn(0);
382 }
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "MatMult_MPIDense"
386 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
387 {
388   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
389   int          ierr;
390 
391   PetscFunctionBegin;
392   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
393   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
394   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 #undef __FUNCT__
399 #define __FUNCT__ "MatMultAdd_MPIDense"
400 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
401 {
402   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
403   int          ierr;
404 
405   PetscFunctionBegin;
406   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
407   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
408   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "MatMultTranspose_MPIDense"
414 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
415 {
416   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
417   int          ierr;
418   PetscScalar  zero = 0.0;
419 
420   PetscFunctionBegin;
421   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
422   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
423   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
424   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
430 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
431 {
432   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
433   int          ierr;
434 
435   PetscFunctionBegin;
436   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
437   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
438   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
439   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatGetDiagonal_MPIDense"
445 int MatGetDiagonal_MPIDense(Mat A,Vec v)
446 {
447   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
448   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
449   int          ierr,len,i,n,m = A->m,radd;
450   PetscScalar  *x,zero = 0.0;
451 
452   PetscFunctionBegin;
453   ierr = VecSet(&zero,v);CHKERRQ(ierr);
454   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
455   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
456   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
457   len  = PetscMin(a->A->m,a->A->n);
458   radd = a->rstart*m;
459   for (i=0; i<len; i++) {
460     x[i] = aloc->v[radd + i*m + i];
461   }
462   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatDestroy_MPIDense"
468 int MatDestroy_MPIDense(Mat mat)
469 {
470   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
471   int          ierr;
472 
473   PetscFunctionBegin;
474 
475 #if defined(PETSC_USE_LOG)
476   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
477 #endif
478   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
479   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
480   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
481   if (mdn->lvec)   VecDestroy(mdn->lvec);
482   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
483   if (mdn->factor) {
484     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
485     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
486     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
487     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
488   }
489   ierr = PetscFree(mdn);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatView_MPIDense_Binary"
495 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
496 {
497   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
498   int          ierr;
499 
500   PetscFunctionBegin;
501   if (mdn->size == 1) {
502     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
503   }
504   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
510 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
511 {
512   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
513   int               ierr,size = mdn->size,rank = mdn->rank;
514   PetscViewerType   vtype;
515   PetscTruth        isascii,isdraw;
516   PetscViewer       sviewer;
517   PetscViewerFormat format;
518 
519   PetscFunctionBegin;
520   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
521   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
522   if (isascii) {
523     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
524     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
525     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
526       MatInfo info;
527       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
528       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
529                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
530       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
531       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
532       PetscFunctionReturn(0);
533     } else if (format == PETSC_VIEWER_ASCII_INFO) {
534       PetscFunctionReturn(0);
535     }
536   } else if (isdraw) {
537     PetscDraw       draw;
538     PetscTruth isnull;
539 
540     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
541     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
542     if (isnull) PetscFunctionReturn(0);
543   }
544 
545   if (size == 1) {
546     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
547   } else {
548     /* assemble the entire matrix onto first processor. */
549     Mat          A;
550     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
551     PetscScalar  *vals;
552 
553     if (!rank) {
554       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
555     } else {
556       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
557     }
558     PetscLogObjectParent(mat,A);
559 
560     /* Copy the matrix ... This isn't the most efficient means,
561        but it's quick for now */
562     row = mdn->rstart; m = mdn->A->m;
563     for (i=0; i<m; i++) {
564       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
565       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
566       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
567       row++;
568     }
569 
570     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
571     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
572     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
573     if (!rank) {
574       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
575     }
576     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
577     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
578     ierr = MatDestroy(A);CHKERRQ(ierr);
579   }
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatView_MPIDense"
585 int MatView_MPIDense(Mat mat,PetscViewer viewer)
586 {
587   int        ierr;
588   PetscTruth isascii,isbinary,isdraw,issocket;
589 
590   PetscFunctionBegin;
591 
592   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
593   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
594   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
595   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
596 
597   if (isascii || issocket || isdraw) {
598     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
599   } else if (isbinary) {
600     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
601   } else {
602     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatGetInfo_MPIDense"
609 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
610 {
611   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
612   Mat          mdn = mat->A;
613   int          ierr;
614   PetscReal    isend[5],irecv[5];
615 
616   PetscFunctionBegin;
617   info->rows_global    = (double)A->M;
618   info->columns_global = (double)A->N;
619   info->rows_local     = (double)A->m;
620   info->columns_local  = (double)A->N;
621   info->block_size     = 1.0;
622   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
623   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
624   isend[3] = info->memory;  isend[4] = info->mallocs;
625   if (flag == MAT_LOCAL) {
626     info->nz_used      = isend[0];
627     info->nz_allocated = isend[1];
628     info->nz_unneeded  = isend[2];
629     info->memory       = isend[3];
630     info->mallocs      = isend[4];
631   } else if (flag == MAT_GLOBAL_MAX) {
632     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
633     info->nz_used      = irecv[0];
634     info->nz_allocated = irecv[1];
635     info->nz_unneeded  = irecv[2];
636     info->memory       = irecv[3];
637     info->mallocs      = irecv[4];
638   } else if (flag == MAT_GLOBAL_SUM) {
639     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
640     info->nz_used      = irecv[0];
641     info->nz_allocated = irecv[1];
642     info->nz_unneeded  = irecv[2];
643     info->memory       = irecv[3];
644     info->mallocs      = irecv[4];
645   }
646   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
647   info->fill_ratio_needed = 0;
648   info->factor_mallocs    = 0;
649   PetscFunctionReturn(0);
650 }
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatSetOption_MPIDense"
654 int MatSetOption_MPIDense(Mat A,MatOption op)
655 {
656   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
657   int          ierr;
658 
659   PetscFunctionBegin;
660   switch (op) {
661   case MAT_NO_NEW_NONZERO_LOCATIONS:
662   case MAT_YES_NEW_NONZERO_LOCATIONS:
663   case MAT_NEW_NONZERO_LOCATION_ERR:
664   case MAT_NEW_NONZERO_ALLOCATION_ERR:
665   case MAT_COLUMNS_SORTED:
666   case MAT_COLUMNS_UNSORTED:
667     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
668     break;
669   case MAT_ROW_ORIENTED:
670     a->roworiented = PETSC_TRUE;
671     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
672     break;
673   case MAT_ROWS_SORTED:
674   case MAT_ROWS_UNSORTED:
675   case MAT_YES_NEW_DIAGONALS:
676   case MAT_USE_HASH_TABLE:
677   case MAT_USE_SINGLE_PRECISION_SOLVES:
678     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
679     break;
680   case MAT_COLUMN_ORIENTED:
681     a->roworiented = PETSC_FALSE;
682     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
683     break;
684   case MAT_IGNORE_OFF_PROC_ENTRIES:
685     a->donotstash = PETSC_TRUE;
686     break;
687   case MAT_NO_NEW_DIAGONALS:
688     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
689   default:
690     SETERRQ(PETSC_ERR_SUP,"unknown option");
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatGetRow_MPIDense"
697 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
698 {
699   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
700   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
701 
702   PetscFunctionBegin;
703   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
704   lrow = row - rstart;
705   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
706   PetscFunctionReturn(0);
707 }
708 
709 #undef __FUNCT__
710 #define __FUNCT__ "MatRestoreRow_MPIDense"
711 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
712 {
713   int ierr;
714 
715   PetscFunctionBegin;
716   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
717   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatDiagonalScale_MPIDense"
723 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
724 {
725   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
726   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
727   PetscScalar  *l,*r,x,*v;
728   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
729 
730   PetscFunctionBegin;
731   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
732   if (ll) {
733     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
734     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
735     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
736     for (i=0; i<m; i++) {
737       x = l[i];
738       v = mat->v + i;
739       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
740     }
741     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
742     PetscLogFlops(n*m);
743   }
744   if (rr) {
745     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
746     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
747     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
748     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
749     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
750     for (i=0; i<n; i++) {
751       x = r[i];
752       v = mat->v + i*m;
753       for (j=0; j<m; j++) { (*v++) *= x;}
754     }
755     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
756     PetscLogFlops(n*m);
757   }
758   PetscFunctionReturn(0);
759 }
760 
761 #undef __FUNCT__
762 #define __FUNCT__ "MatNorm_MPIDense"
763 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
764 {
765   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
766   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
767   int          ierr,i,j;
768   PetscReal    sum = 0.0;
769   PetscScalar  *v = mat->v;
770 
771   PetscFunctionBegin;
772   if (mdn->size == 1) {
773     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
774   } else {
775     if (type == NORM_FROBENIUS) {
776       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
777 #if defined(PETSC_USE_COMPLEX)
778         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
779 #else
780         sum += (*v)*(*v); v++;
781 #endif
782       }
783       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
784       *nrm = sqrt(*nrm);
785       PetscLogFlops(2*mdn->A->n*mdn->A->m);
786     } else if (type == NORM_1) {
787       PetscReal *tmp,*tmp2;
788       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
789       tmp2 = tmp + A->N;
790       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
791       *nrm = 0.0;
792       v = mat->v;
793       for (j=0; j<mdn->A->n; j++) {
794         for (i=0; i<mdn->A->m; i++) {
795           tmp[j] += PetscAbsScalar(*v);  v++;
796         }
797       }
798       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
799       for (j=0; j<A->N; j++) {
800         if (tmp2[j] > *nrm) *nrm = tmp2[j];
801       }
802       ierr = PetscFree(tmp);CHKERRQ(ierr);
803       PetscLogFlops(A->n*A->m);
804     } else if (type == NORM_INFINITY) { /* max row norm */
805       PetscReal ntemp;
806       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
807       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
808     } else {
809       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
810     }
811   }
812   PetscFunctionReturn(0);
813 }
814 
815 #undef __FUNCT__
816 #define __FUNCT__ "MatTranspose_MPIDense"
817 int MatTranspose_MPIDense(Mat A,Mat *matout)
818 {
819   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
820   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
821   Mat          B;
822   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
823   int          j,i,ierr;
824   PetscScalar  *v;
825 
826   PetscFunctionBegin;
827   if (!matout && M != N) {
828     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
829   }
830   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
831 
832   m = a->A->m; n = a->A->n; v = Aloc->v;
833   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
834   for (j=0; j<n; j++) {
835     for (i=0; i<m; i++) rwork[i] = rstart + i;
836     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
837     v   += m;
838   }
839   ierr = PetscFree(rwork);CHKERRQ(ierr);
840   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
841   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
842   if (matout) {
843     *matout = B;
844   } else {
845     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
846   }
847   PetscFunctionReturn(0);
848 }
849 
850 #include "petscblaslapack.h"
851 #undef __FUNCT__
852 #define __FUNCT__ "MatScale_MPIDense"
853 int MatScale_MPIDense(PetscScalar *alpha,Mat inA)
854 {
855   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
856   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
857   int          one = 1,nz;
858 
859   PetscFunctionBegin;
860   nz = inA->m*inA->N;
861   BLscal_(&nz,alpha,a->v,&one);
862   PetscLogFlops(nz);
863   PetscFunctionReturn(0);
864 }
865 
866 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
867 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
871 int MatSetUpPreallocation_MPIDense(Mat A)
872 {
873   int        ierr;
874 
875   PetscFunctionBegin;
876   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
877   PetscFunctionReturn(0);
878 }
879 
880 /* -------------------------------------------------------------------*/
881 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
882        MatGetRow_MPIDense,
883        MatRestoreRow_MPIDense,
884        MatMult_MPIDense,
885        MatMultAdd_MPIDense,
886        MatMultTranspose_MPIDense,
887        MatMultTransposeAdd_MPIDense,
888        0,
889        0,
890        0,
891        0,
892        0,
893        0,
894        0,
895        MatTranspose_MPIDense,
896        MatGetInfo_MPIDense,0,
897        MatGetDiagonal_MPIDense,
898        MatDiagonalScale_MPIDense,
899        MatNorm_MPIDense,
900        MatAssemblyBegin_MPIDense,
901        MatAssemblyEnd_MPIDense,
902        0,
903        MatSetOption_MPIDense,
904        MatZeroEntries_MPIDense,
905        MatZeroRows_MPIDense,
906        0,
907        0,
908        0,
909        0,
910        MatSetUpPreallocation_MPIDense,
911        0,
912        0,
913        MatGetArray_MPIDense,
914        MatRestoreArray_MPIDense,
915        MatDuplicate_MPIDense,
916        0,
917        0,
918        0,
919        0,
920        0,
921        MatGetSubMatrices_MPIDense,
922        0,
923        MatGetValues_MPIDense,
924        0,
925        0,
926        MatScale_MPIDense,
927        0,
928        0,
929        0,
930        MatGetBlockSize_MPIDense,
931        0,
932        0,
933        0,
934        0,
935        0,
936        0,
937        0,
938        0,
939        0,
940        MatGetSubMatrix_MPIDense,
941        MatDestroy_MPIDense,
942        MatView_MPIDense,
943        MatGetPetscMaps_Petsc};
944 
945 EXTERN_C_BEGIN
946 #undef __FUNCT__
947 #define __FUNCT__ "MatCreate_MPIDense"
948 int MatCreate_MPIDense(Mat mat)
949 {
950   Mat_MPIDense *a;
951   int          ierr,i;
952 
953   PetscFunctionBegin;
954   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
955   mat->data         = (void*)a;
956   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
957   mat->factor       = 0;
958   mat->mapping      = 0;
959 
960   a->factor       = 0;
961   mat->insertmode = NOT_SET_VALUES;
962   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
963   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
964 
965   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
966   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
967   a->nvec = mat->n;
968 
969   /* the information in the maps duplicates the information computed below, eventually
970      we should remove the duplicate information that is not contained in the maps */
971   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
972   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
973 
974   /* build local table of row and column ownerships */
975   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
976   a->cowners = a->rowners + a->size + 1;
977   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
978   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
979   a->rowners[0] = 0;
980   for (i=2; i<=a->size; i++) {
981     a->rowners[i] += a->rowners[i-1];
982   }
983   a->rstart = a->rowners[a->rank];
984   a->rend   = a->rowners[a->rank+1];
985   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
986   a->cowners[0] = 0;
987   for (i=2; i<=a->size; i++) {
988     a->cowners[i] += a->cowners[i-1];
989   }
990 
991   /* build cache for off array entries formed */
992   a->donotstash = PETSC_FALSE;
993   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
994 
995   /* stuff used for matrix vector multiply */
996   a->lvec        = 0;
997   a->Mvctx       = 0;
998   a->roworiented = PETSC_TRUE;
999 
1000   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1001                                      "MatGetDiagonalBlock_MPIDense",
1002                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 EXTERN_C_END
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1009 /*@C
1010    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1011 
1012    Not collective
1013 
1014    Input Parameters:
1015 .  A - the matrix
1016 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1017    to control all matrix memory allocation.
1018 
1019    Notes:
1020    The dense format is fully compatible with standard Fortran 77
1021    storage by columns.
1022 
1023    The data input variable is intended primarily for Fortran programmers
1024    who wish to allocate their own matrix memory space.  Most users should
1025    set data=PETSC_NULL.
1026 
1027    Level: intermediate
1028 
1029 .keywords: matrix,dense, parallel
1030 
1031 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1032 @*/
1033 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1034 {
1035   Mat_MPIDense *a;
1036   int          ierr;
1037   PetscTruth   flg2;
1038 
1039   PetscFunctionBegin;
1040   ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr);
1041   if (!flg2) PetscFunctionReturn(0);
1042   mat->preallocated = PETSC_TRUE;
1043   /* Note:  For now, when data is specified above, this assumes the user correctly
1044    allocates the local dense storage space.  We should add error checking. */
1045 
1046   a    = (Mat_MPIDense*)mat->data;
1047   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
1048   PetscLogObjectParent(mat,a->A);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatCreateMPIDense"
1054 /*@C
1055    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1056 
1057    Collective on MPI_Comm
1058 
1059    Input Parameters:
1060 +  comm - MPI communicator
1061 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1062 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1063 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1064 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1065 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1066    to control all matrix memory allocation.
1067 
1068    Output Parameter:
1069 .  A - the matrix
1070 
1071    Notes:
1072    The dense format is fully compatible with standard Fortran 77
1073    storage by columns.
1074 
1075    The data input variable is intended primarily for Fortran programmers
1076    who wish to allocate their own matrix memory space.  Most users should
1077    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1078 
1079    The user MUST specify either the local or global matrix dimensions
1080    (possibly both).
1081 
1082    Level: intermediate
1083 
1084 .keywords: matrix,dense, parallel
1085 
1086 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1087 @*/
1088 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1089 {
1090   int ierr,size;
1091 
1092   PetscFunctionBegin;
1093   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1094   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1095   if (size > 1) {
1096     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1097     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1098   } else {
1099     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1100     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1101   }
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatDuplicate_MPIDense"
1107 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1108 {
1109   Mat          mat;
1110   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1111   int          ierr;
1112 
1113   PetscFunctionBegin;
1114   *newmat       = 0;
1115   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1116   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1117   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1118   mat->data         = (void*)a;
1119   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1120   mat->factor       = A->factor;
1121   mat->assembled    = PETSC_TRUE;
1122   mat->preallocated = PETSC_TRUE;
1123 
1124   a->rstart       = oldmat->rstart;
1125   a->rend         = oldmat->rend;
1126   a->size         = oldmat->size;
1127   a->rank         = oldmat->rank;
1128   mat->insertmode = NOT_SET_VALUES;
1129   a->nvec         = oldmat->nvec;
1130   a->donotstash   = oldmat->donotstash;
1131   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1132   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1133   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1134   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1135 
1136   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1137   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1138   PetscLogObjectParent(mat,a->A);
1139   *newmat = mat;
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #include "petscsys.h"
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1147 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1148 {
1149   int          *rowners,i,size,rank,m,ierr,nz,j;
1150   PetscScalar  *array,*vals,*vals_ptr;
1151   MPI_Status   status;
1152 
1153   PetscFunctionBegin;
1154   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1155   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1156 
1157   /* determine ownership of all rows */
1158   m          = M/size + ((M % size) > rank);
1159   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1160   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1161   rowners[0] = 0;
1162   for (i=2; i<=size; i++) {
1163     rowners[i] += rowners[i-1];
1164   }
1165 
1166   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1167   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1168 
1169   if (!rank) {
1170     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1171 
1172     /* read in my part of the matrix numerical values  */
1173     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1174 
1175     /* insert into matrix-by row (this is why cannot directly read into array */
1176     vals_ptr = vals;
1177     for (i=0; i<m; i++) {
1178       for (j=0; j<N; j++) {
1179         array[i + j*m] = *vals_ptr++;
1180       }
1181     }
1182 
1183     /* read in other processors and ship out */
1184     for (i=1; i<size; i++) {
1185       nz   = (rowners[i+1] - rowners[i])*N;
1186       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1187       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1188     }
1189   } else {
1190     /* receive numeric values */
1191     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1192 
1193     /* receive message of values*/
1194     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1195 
1196     /* insert into matrix-by row (this is why cannot directly read into array */
1197     vals_ptr = vals;
1198     for (i=0; i<m; i++) {
1199       for (j=0; j<N; j++) {
1200         array[i + j*m] = *vals_ptr++;
1201       }
1202     }
1203   }
1204   ierr = PetscFree(rowners);CHKERRQ(ierr);
1205   ierr = PetscFree(vals);CHKERRQ(ierr);
1206   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1207   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 EXTERN_C_BEGIN
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatLoad_MPIDense"
1214 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1215 {
1216   Mat          A;
1217   PetscScalar  *vals,*svals;
1218   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1219   MPI_Status   status;
1220   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1221   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1222   int          tag = ((PetscObject)viewer)->tag;
1223   int          i,nz,ierr,j,rstart,rend,fd;
1224 
1225   PetscFunctionBegin;
1226   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1227   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1228   if (!rank) {
1229     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1230     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1231     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1232   }
1233 
1234   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1235   M = header[1]; N = header[2]; nz = header[3];
1236 
1237   /*
1238        Handle case where matrix is stored on disk as a dense matrix
1239   */
1240   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1241     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1242     PetscFunctionReturn(0);
1243   }
1244 
1245   /* determine ownership of all rows */
1246   m          = M/size + ((M % size) > rank);
1247   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1248   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1249   rowners[0] = 0;
1250   for (i=2; i<=size; i++) {
1251     rowners[i] += rowners[i-1];
1252   }
1253   rstart = rowners[rank];
1254   rend   = rowners[rank+1];
1255 
1256   /* distribute row lengths to all processors */
1257   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1258   offlens = ourlens + (rend-rstart);
1259   if (!rank) {
1260     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1261     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1262     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1263     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1264     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1265     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1266   } else {
1267     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1268   }
1269 
1270   if (!rank) {
1271     /* calculate the number of nonzeros on each processor */
1272     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1273     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1274     for (i=0; i<size; i++) {
1275       for (j=rowners[i]; j< rowners[i+1]; j++) {
1276         procsnz[i] += rowlengths[j];
1277       }
1278     }
1279     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1280 
1281     /* determine max buffer needed and allocate it */
1282     maxnz = 0;
1283     for (i=0; i<size; i++) {
1284       maxnz = PetscMax(maxnz,procsnz[i]);
1285     }
1286     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1287 
1288     /* read in my part of the matrix column indices  */
1289     nz = procsnz[0];
1290     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1291     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1292 
1293     /* read in every one elses and ship off */
1294     for (i=1; i<size; i++) {
1295       nz   = procsnz[i];
1296       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1297       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1298     }
1299     ierr = PetscFree(cols);CHKERRQ(ierr);
1300   } else {
1301     /* determine buffer space needed for message */
1302     nz = 0;
1303     for (i=0; i<m; i++) {
1304       nz += ourlens[i];
1305     }
1306     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1307 
1308     /* receive message of column indices*/
1309     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1310     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1311     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1312   }
1313 
1314   /* loop over local rows, determining number of off diagonal entries */
1315   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1316   jj = 0;
1317   for (i=0; i<m; i++) {
1318     for (j=0; j<ourlens[i]; j++) {
1319       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1320       jj++;
1321     }
1322   }
1323 
1324   /* create our matrix */
1325   for (i=0; i<m; i++) {
1326     ourlens[i] -= offlens[i];
1327   }
1328   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1329   A = *newmat;
1330   for (i=0; i<m; i++) {
1331     ourlens[i] += offlens[i];
1332   }
1333 
1334   if (!rank) {
1335     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1336 
1337     /* read in my part of the matrix numerical values  */
1338     nz = procsnz[0];
1339     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1340 
1341     /* insert into matrix */
1342     jj      = rstart;
1343     smycols = mycols;
1344     svals   = vals;
1345     for (i=0; i<m; i++) {
1346       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1347       smycols += ourlens[i];
1348       svals   += ourlens[i];
1349       jj++;
1350     }
1351 
1352     /* read in other processors and ship out */
1353     for (i=1; i<size; i++) {
1354       nz   = procsnz[i];
1355       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1356       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1357     }
1358     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1359   } else {
1360     /* receive numeric values */
1361     ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1362 
1363     /* receive message of values*/
1364     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1365     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1366     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1367 
1368     /* insert into matrix */
1369     jj      = rstart;
1370     smycols = mycols;
1371     svals   = vals;
1372     for (i=0; i<m; i++) {
1373       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1374       smycols += ourlens[i];
1375       svals   += ourlens[i];
1376       jj++;
1377     }
1378   }
1379   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1380   ierr = PetscFree(vals);CHKERRQ(ierr);
1381   ierr = PetscFree(mycols);CHKERRQ(ierr);
1382   ierr = PetscFree(rowners);CHKERRQ(ierr);
1383 
1384   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1385   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1386   PetscFunctionReturn(0);
1387 }
1388 EXTERN_C_END
1389 
1390 
1391 
1392 
1393 
1394