xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision ca3fa75bd98f564f8d45d53ada4cc05613614b83)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.118 1999/07/26 01:16:26 curfman Exp curfman $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 extern int MatSetUpMultiply_MPIDense(Mat);
13 
14 #undef __FUNC__
15 #define __FUNC__ "MatSetValues_MPIDense"
16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
17 {
18   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
19   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
20   int          roworiented = A->roworiented;
21 
22   PetscFunctionBegin;
23   for ( i=0; i<m; i++ ) {
24     if (idxm[i] < 0) continue;
25     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
26     if (idxm[i] >= rstart && idxm[i] < rend) {
27       row = idxm[i] - rstart;
28       if (roworiented) {
29         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
30       } else {
31         for ( j=0; j<n; j++ ) {
32           if (idxn[j] < 0) continue;
33           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
34           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
35         }
36       }
37     } else {
38       if ( !A->donotstash) {
39         if (roworiented) {
40           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
41         } else {
42           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
43         }
44       }
45     }
46   }
47   PetscFunctionReturn(0);
48 }
49 
50 #undef __FUNC__
51 #define __FUNC__ "MatGetValues_MPIDense"
52 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
53 {
54   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
55   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
56 
57   PetscFunctionBegin;
58   for ( i=0; i<m; i++ ) {
59     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
60     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
61     if (idxm[i] >= rstart && idxm[i] < rend) {
62       row = idxm[i] - rstart;
63       for ( j=0; j<n; j++ ) {
64         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
65         if (idxn[j] >= mdn->N) {
66           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
67         }
68         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
69       }
70     } else {
71       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetArray_MPIDense"
79 int MatGetArray_MPIDense(Mat A,Scalar **array)
80 {
81   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82   int          ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNC__
90 #define __FUNC__ "MatGetSubMatrix_MPIDense"
91 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
92 {
93   Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd;
94   Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data;
95   int          i, j, ierr, *irow, *icol, nrows, ncols, nlrows, nlcols;
96   Scalar       *av, *bv, *v = lmat->v;
97   Mat          newmat;
98 
99   PetscFunctionBegin;
100   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
101   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
102   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
103   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
104 
105   /* No parallel redistribution currently supported! Should really check each index set
106      to comfirm that it is OK ... temporarily just support retaining all rows on each proc. */
107 
108   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
109   if (nlrows != nrows) SETERRQ(PETSC_ERR_ARG_SIZ,0,
110     "Currently supports only submatrix with ALL rows of additional matrix (same partitioning)");
111 
112   /* Check submatrix call */
113   if (scall == MAT_REUSE_MATRIX) {
114     int n_cols,n_rows;
115     ierr = MatGetSize(*B,&n_rows,&n_cols);CHKERRQ(ierr);
116     if (n_rows != nrows || n_cols != ncols) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size");
117     newmat = *B;
118   } else {
119     /* Create and fill new matrix */
120     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
121   }
122 
123   /* Now extract the data pointers and do the copy, column at a time */
124   newmatd = (Mat_MPIDense *) newmat->data;
125   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
126 
127   for ( i=0; i<ncols; i++ ) {
128     av = v + nlrows*icol[i];
129     for (j=0; j<nrows; j++ ) {
130       *bv++ = av[irow[j]];
131     }
132   }
133 
134   /* Assemble the matrices so that the correct flags are set */
135   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
136   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
137 
138   /* Free work space */
139   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
140   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
141   *B = newmat;
142   PetscFunctionReturn(0);
143 }
144 
145 #undef __FUNC__
146 #define __FUNC__ "MatRestoreArray_MPIDense"
147 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
148 {
149   PetscFunctionBegin;
150   PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNC__
154 #define __FUNC__ "MatAssemblyBegin_MPIDense"
155 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
156 {
157   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
158   MPI_Comm     comm = mat->comm;
159   int          ierr,nstash,reallocs;
160   InsertMode   addv;
161 
162   PetscFunctionBegin;
163   /* make sure all processors are either in INSERTMODE or ADDMODE */
164   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
165   if (addv == (ADD_VALUES|INSERT_VALUES)) {
166     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
167   }
168   mat->insertmode = addv; /* in case this processor had no cache */
169 
170   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
171   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
172   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
173            nstash,reallocs);
174   PetscFunctionReturn(0);
175 }
176 
177 #undef __FUNC__
178 #define __FUNC__ "MatAssemblyEnd_MPIDense"
179 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
180 {
181   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
182   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
183   Scalar       *val;
184   InsertMode   addv=mat->insertmode;
185 
186   PetscFunctionBegin;
187   /*  wait on receives */
188   while (1) {
189     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
190     if (!flg) break;
191 
192     for ( i=0; i<n; ) {
193       /* Now identify the consecutive vals belonging to the same row */
194       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
195       if (j < n) ncols = j-i;
196       else       ncols = n-i;
197       /* Now assemble all these values with a single function call */
198       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
199       i = j;
200     }
201   }
202   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
203 
204   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
205   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
206 
207   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
208     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
209   }
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNC__
214 #define __FUNC__ "MatZeroEntries_MPIDense"
215 int MatZeroEntries_MPIDense(Mat A)
216 {
217   int          ierr;
218   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
219 
220   PetscFunctionBegin;
221   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNC__
226 #define __FUNC__ "MatGetBlockSize_MPIDense"
227 int MatGetBlockSize_MPIDense(Mat A,int *bs)
228 {
229   PetscFunctionBegin;
230   *bs = 1;
231   PetscFunctionReturn(0);
232 }
233 
234 /* the code does not do the diagonal entries correctly unless the
235    matrix is square and the column and row owerships are identical.
236    This is a BUG. The only way to fix it seems to be to access
237    mdn->A and mdn->B directly and not through the MatZeroRows()
238    routine.
239 */
240 #undef __FUNC__
241 #define __FUNC__ "MatZeroRows_MPIDense"
242 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
243 {
244   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
245   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
246   int            *procs,*nprocs,j,found,idx,nsends,*work;
247   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
248   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
249   int            *lens,imdex,*lrows,*values;
250   MPI_Comm       comm = A->comm;
251   MPI_Request    *send_waits,*recv_waits;
252   MPI_Status     recv_status,*send_status;
253   IS             istmp;
254 
255   PetscFunctionBegin;
256   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
257   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
258 
259   /*  first count number of contributors to each processor */
260   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
261   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
262   procs  = nprocs + size;
263   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
264   for ( i=0; i<N; i++ ) {
265     idx = rows[i];
266     found = 0;
267     for ( j=0; j<size; j++ ) {
268       if (idx >= owners[j] && idx < owners[j+1]) {
269         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
270       }
271     }
272     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
273   }
274   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
275 
276   /* inform other processors of number of messages and max length*/
277   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
278   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
279   nrecvs = work[rank];
280   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
281   nmax   = work[rank];
282   ierr = PetscFree(work);CHKERRQ(ierr);
283 
284   /* post receives:   */
285   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
286   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
287   for ( i=0; i<nrecvs; i++ ) {
288     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
289   }
290 
291   /* do sends:
292       1) starts[i] gives the starting index in svalues for stuff going to
293          the ith processor
294   */
295   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
296   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
297   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
298   starts[0]  = 0;
299   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
300   for ( i=0; i<N; i++ ) {
301     svalues[starts[owner[i]]++] = rows[i];
302   }
303   ISRestoreIndices(is,&rows);
304 
305   starts[0] = 0;
306   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
307   count = 0;
308   for ( i=0; i<size; i++ ) {
309     if (procs[i]) {
310       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
311     }
312   }
313   ierr = PetscFree(starts);CHKERRQ(ierr);
314 
315   base = owners[rank];
316 
317   /*  wait on receives */
318   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
319   source = lens + nrecvs;
320   count  = nrecvs; slen = 0;
321   while (count) {
322     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
323     /* unpack receives into our local space */
324     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
325     source[imdex]  = recv_status.MPI_SOURCE;
326     lens[imdex]  = n;
327     slen += n;
328     count--;
329   }
330   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
331 
332   /* move the data into the send scatter */
333   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
334   count = 0;
335   for ( i=0; i<nrecvs; i++ ) {
336     values = rvalues + i*nmax;
337     for ( j=0; j<lens[i]; j++ ) {
338       lrows[count++] = values[j] - base;
339     }
340   }
341   ierr = PetscFree(rvalues);CHKERRQ(ierr);
342   ierr = PetscFree(lens);CHKERRQ(ierr);
343   ierr = PetscFree(owner);CHKERRQ(ierr);
344   ierr = PetscFree(nprocs);CHKERRQ(ierr);
345 
346   /* actually zap the local rows */
347   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
348   PLogObjectParent(A,istmp);
349   ierr = PetscFree(lrows);CHKERRQ(ierr);
350   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
351   ierr = ISDestroy(istmp);CHKERRQ(ierr);
352 
353   /* wait on sends */
354   if (nsends) {
355     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
356     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
357     ierr = PetscFree(send_status);CHKERRQ(ierr);
358   }
359   ierr = PetscFree(send_waits);CHKERRQ(ierr);
360   ierr = PetscFree(svalues);CHKERRQ(ierr);
361 
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNC__
366 #define __FUNC__ "MatMult_MPIDense"
367 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
368 {
369   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
370   int          ierr;
371 
372   PetscFunctionBegin;
373   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
374   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
375   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
376   PetscFunctionReturn(0);
377 }
378 
379 #undef __FUNC__
380 #define __FUNC__ "MatMultAdd_MPIDense"
381 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
382 {
383   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
384   int          ierr;
385 
386   PetscFunctionBegin;
387   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
388   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
389   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNC__
394 #define __FUNC__ "MatMultTrans_MPIDense"
395 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
396 {
397   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
398   int          ierr;
399   Scalar       zero = 0.0;
400 
401   PetscFunctionBegin;
402   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
403   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
404   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
405   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNC__
410 #define __FUNC__ "MatMultTransAdd_MPIDense"
411 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
412 {
413   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
414   int          ierr;
415 
416   PetscFunctionBegin;
417   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
418   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
419   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
420   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNC__
425 #define __FUNC__ "MatGetDiagonal_MPIDense"
426 int MatGetDiagonal_MPIDense(Mat A,Vec v)
427 {
428   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
429   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
430   int          ierr, len, i, n, m = a->m, radd;
431   Scalar       *x, zero = 0.0;
432 
433   PetscFunctionBegin;
434   VecSet(&zero,v);
435   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
436   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
437   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
438   len = PetscMin(aloc->m,aloc->n);
439   radd = a->rstart*m;
440   for ( i=0; i<len; i++ ) {
441     x[i] = aloc->v[radd + i*m + i];
442   }
443   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
444   PetscFunctionReturn(0);
445 }
446 
447 #undef __FUNC__
448 #define __FUNC__ "MatDestroy_MPIDense"
449 int MatDestroy_MPIDense(Mat mat)
450 {
451   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
452   int          ierr;
453 
454   PetscFunctionBegin;
455 
456   if (mat->mapping) {
457     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
458   }
459   if (mat->bmapping) {
460     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
461   }
462 #if defined(PETSC_USE_LOG)
463   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
464 #endif
465   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
466   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
467   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
468   if (mdn->lvec)   VecDestroy(mdn->lvec);
469   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
470   if (mdn->factor) {
471     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
472     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
473     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
474     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
475   }
476   ierr = PetscFree(mdn);CHKERRQ(ierr);
477   if (mat->rmap) {
478     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
479   }
480   if (mat->cmap) {
481     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
482   }
483   PLogObjectDestroy(mat);
484   PetscHeaderDestroy(mat);
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNC__
489 #define __FUNC__ "MatView_MPIDense_Binary"
490 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
491 {
492   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
493   int          ierr;
494 
495   PetscFunctionBegin;
496   if (mdn->size == 1) {
497     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
498   }
499   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNC__
504 #define __FUNC__ "MatView_MPIDense_ASCII"
505 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
506 {
507   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
508   int          ierr, format, size = mdn->size, rank = mdn->rank;
509   FILE         *fd;
510   ViewerType   vtype;
511 
512   PetscFunctionBegin;
513   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
514   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
515   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
516   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
517     MatInfo info;
518     ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
519     PetscSequentialPhaseBegin(mat->comm,1);
520       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
521          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
522       fflush(fd);
523     PetscSequentialPhaseEnd(mat->comm,1);
524     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
525     PetscFunctionReturn(0);
526   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
527     PetscFunctionReturn(0);
528   }
529 
530   if (size == 1) {
531     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
532   } else {
533     /* assemble the entire matrix onto first processor. */
534     Mat          A;
535     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
536     Scalar       *vals;
537     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
538 
539     if (!rank) {
540       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
541     } else {
542       ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
543     }
544     PLogObjectParent(mat,A);
545 
546     /* Copy the matrix ... This isn't the most efficient means,
547        but it's quick for now */
548     row = mdn->rstart; m = Amdn->m;
549     for ( i=0; i<m; i++ ) {
550       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
551       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
552       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
553       row++;
554     }
555 
556     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     if (!rank) {
559       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr);
560     }
561     ierr = MatDestroy(A);CHKERRQ(ierr);
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNC__
567 #define __FUNC__ "MatView_MPIDense"
568 int MatView_MPIDense(Mat mat,Viewer viewer)
569 {
570   int          ierr;
571   ViewerType   vtype;
572 
573   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
574   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
575     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
576   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
577     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
578   } else {
579     SETERRQ(1,1,"Viewer type not supported by PETSc object");
580   }
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNC__
585 #define __FUNC__ "MatGetInfo_MPIDense"
586 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
587 {
588   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
589   Mat          mdn = mat->A;
590   int          ierr;
591   double       isend[5], irecv[5];
592 
593   PetscFunctionBegin;
594   info->rows_global    = (double)mat->M;
595   info->columns_global = (double)mat->N;
596   info->rows_local     = (double)mat->m;
597   info->columns_local  = (double)mat->N;
598   info->block_size     = 1.0;
599   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
600   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
601   isend[3] = info->memory;  isend[4] = info->mallocs;
602   if (flag == MAT_LOCAL) {
603     info->nz_used      = isend[0];
604     info->nz_allocated = isend[1];
605     info->nz_unneeded  = isend[2];
606     info->memory       = isend[3];
607     info->mallocs      = isend[4];
608   } else if (flag == MAT_GLOBAL_MAX) {
609     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
610     info->nz_used      = irecv[0];
611     info->nz_allocated = irecv[1];
612     info->nz_unneeded  = irecv[2];
613     info->memory       = irecv[3];
614     info->mallocs      = irecv[4];
615   } else if (flag == MAT_GLOBAL_SUM) {
616     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
617     info->nz_used      = irecv[0];
618     info->nz_allocated = irecv[1];
619     info->nz_unneeded  = irecv[2];
620     info->memory       = irecv[3];
621     info->mallocs      = irecv[4];
622   }
623   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
624   info->fill_ratio_needed = 0;
625   info->factor_mallocs    = 0;
626   PetscFunctionReturn(0);
627 }
628 
629 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
630    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
631    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
632    extern int MatSolve_MPIDense(Mat,Vec,Vec);
633    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
634    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
635    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
636 
637 #undef __FUNC__
638 #define __FUNC__ "MatSetOption_MPIDense"
639 int MatSetOption_MPIDense(Mat A,MatOption op)
640 {
641   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
642 
643   PetscFunctionBegin;
644   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
645       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
646       op == MAT_NEW_NONZERO_LOCATION_ERR ||
647       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
648       op == MAT_COLUMNS_SORTED ||
649       op == MAT_COLUMNS_UNSORTED) {
650         MatSetOption(a->A,op);
651   } else if (op == MAT_ROW_ORIENTED) {
652         a->roworiented = 1;
653         MatSetOption(a->A,op);
654   } else if (op == MAT_ROWS_SORTED ||
655              op == MAT_ROWS_UNSORTED ||
656              op == MAT_SYMMETRIC ||
657              op == MAT_STRUCTURALLY_SYMMETRIC ||
658              op == MAT_YES_NEW_DIAGONALS ||
659              op == MAT_USE_HASH_TABLE) {
660     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
661   } else if (op == MAT_COLUMN_ORIENTED) {
662     a->roworiented = 0; MatSetOption(a->A,op);
663   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
664     a->donotstash = 1;
665   } else if (op == MAT_NO_NEW_DIAGONALS) {
666     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
667   } else {
668     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 #undef __FUNC__
674 #define __FUNC__ "MatGetSize_MPIDense"
675 int MatGetSize_MPIDense(Mat A,int *m,int *n)
676 {
677   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
678 
679   PetscFunctionBegin;
680   *m = mat->M; *n = mat->N;
681   PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNC__
685 #define __FUNC__ "MatGetLocalSize_MPIDense"
686 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
687 {
688   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
689 
690   PetscFunctionBegin;
691   *m = mat->m; *n = mat->N;
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNC__
696 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
697 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
698 {
699   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
700 
701   PetscFunctionBegin;
702   *m = mat->rstart; *n = mat->rend;
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNC__
707 #define __FUNC__ "MatGetRow_MPIDense"
708 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
709 {
710   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
711   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
712 
713   PetscFunctionBegin;
714   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
715   lrow = row - rstart;
716   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNC__
721 #define __FUNC__ "MatRestoreRow_MPIDense"
722 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
723 {
724   int ierr;
725 
726   PetscFunctionBegin;
727   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
728   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNC__
733 #define __FUNC__ "MatNorm_MPIDense"
734 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
735 {
736   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
737   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
738   int          ierr, i, j;
739   double       sum = 0.0;
740   Scalar       *v = mat->v;
741 
742   PetscFunctionBegin;
743   if (mdn->size == 1) {
744     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
745   } else {
746     if (type == NORM_FROBENIUS) {
747       for (i=0; i<mat->n*mat->m; i++ ) {
748 #if defined(PETSC_USE_COMPLEX)
749         sum += PetscReal(PetscConj(*v)*(*v)); v++;
750 #else
751         sum += (*v)*(*v); v++;
752 #endif
753       }
754       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
755       *norm = sqrt(*norm);
756       PLogFlops(2*mat->n*mat->m);
757     } else if (type == NORM_1) {
758       double *tmp, *tmp2;
759       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
760       tmp2 = tmp + mdn->N;
761       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
762       *norm = 0.0;
763       v = mat->v;
764       for ( j=0; j<mat->n; j++ ) {
765         for ( i=0; i<mat->m; i++ ) {
766           tmp[j] += PetscAbsScalar(*v);  v++;
767         }
768       }
769       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
770       for ( j=0; j<mdn->N; j++ ) {
771         if (tmp2[j] > *norm) *norm = tmp2[j];
772       }
773       ierr = PetscFree(tmp);CHKERRQ(ierr);
774       PLogFlops(mat->n*mat->m);
775     } else if (type == NORM_INFINITY) { /* max row norm */
776       double ntemp;
777       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
778       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
779     } else {
780       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
781     }
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 #undef __FUNC__
787 #define __FUNC__ "MatTranspose_MPIDense"
788 int MatTranspose_MPIDense(Mat A,Mat *matout)
789 {
790   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
791   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
792   Mat          B;
793   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
794   int          j, i, ierr;
795   Scalar       *v;
796 
797   PetscFunctionBegin;
798   if (matout == PETSC_NULL && M != N) {
799     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
800   }
801   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
802 
803   m = Aloc->m; n = Aloc->n; v = Aloc->v;
804   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
805   for ( j=0; j<n; j++ ) {
806     for (i=0; i<m; i++) rwork[i] = rstart + i;
807     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
808     v   += m;
809   }
810   ierr = PetscFree(rwork);CHKERRQ(ierr);
811   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
812   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
813   if (matout != PETSC_NULL) {
814     *matout = B;
815   } else {
816     PetscOps *Abops;
817     MatOps   Aops;
818 
819     /* This isn't really an in-place transpose, but free data struct from a */
820     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
821     ierr = MatDestroy(a->A);CHKERRQ(ierr);
822     if (a->lvec) VecDestroy(a->lvec);
823     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
824     ierr = PetscFree(a);CHKERRQ(ierr);
825 
826     /*
827          This is horrible, horrible code. We need to keep the
828       A pointers for the bops and ops but copy everything
829       else from C.
830     */
831     Abops   = A->bops;
832     Aops    = A->ops;
833     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
834     A->bops = Abops;
835     A->ops  = Aops;
836 
837     PetscHeaderDestroy(B);
838   }
839   PetscFunctionReturn(0);
840 }
841 
842 #include "pinclude/blaslapack.h"
843 #undef __FUNC__
844 #define __FUNC__ "MatScale_MPIDense"
845 int MatScale_MPIDense(Scalar *alpha,Mat inA)
846 {
847   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
848   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
849   int          one = 1, nz;
850 
851   PetscFunctionBegin;
852   nz = a->m*a->n;
853   BLscal_( &nz, alpha, a->v, &one );
854   PLogFlops(nz);
855   PetscFunctionReturn(0);
856 }
857 
858 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
859 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
860 
861 /* -------------------------------------------------------------------*/
862 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
863        MatGetRow_MPIDense,
864        MatRestoreRow_MPIDense,
865        MatMult_MPIDense,
866        MatMultAdd_MPIDense,
867        MatMultTrans_MPIDense,
868        MatMultTransAdd_MPIDense,
869        0,
870        0,
871        0,
872        0,
873        0,
874        0,
875        0,
876        MatTranspose_MPIDense,
877        MatGetInfo_MPIDense,0,
878        MatGetDiagonal_MPIDense,
879        0,
880        MatNorm_MPIDense,
881        MatAssemblyBegin_MPIDense,
882        MatAssemblyEnd_MPIDense,
883        0,
884        MatSetOption_MPIDense,
885        MatZeroEntries_MPIDense,
886        MatZeroRows_MPIDense,
887        0,
888        0,
889        0,
890        0,
891        MatGetSize_MPIDense,
892        MatGetLocalSize_MPIDense,
893        MatGetOwnershipRange_MPIDense,
894        0,
895        0,
896        MatGetArray_MPIDense,
897        MatRestoreArray_MPIDense,
898        MatDuplicate_MPIDense,
899        0,
900        0,
901        0,
902        0,
903        0,
904        MatGetSubMatrices_MPIDense,
905        0,
906        MatGetValues_MPIDense,
907        0,
908        0,
909        MatScale_MPIDense,
910        0,
911        0,
912        0,
913        MatGetBlockSize_MPIDense,
914        0,
915        0,
916        0,
917        0,
918        0,
919        0,
920        0,
921        0,
922        0,
923        MatGetSubMatrix_MPIDense,
924        0,
925        0,
926        MatGetMaps_Petsc};
927 
928 #undef __FUNC__
929 #define __FUNC__ "MatCreateMPIDense"
930 /*@C
931    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
932 
933    Collective on MPI_Comm
934 
935    Input Parameters:
936 +  comm - MPI communicator
937 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
938 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
939 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
940 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
941 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
942    to control all matrix memory allocation.
943 
944    Output Parameter:
945 .  A - the matrix
946 
947    Notes:
948    The dense format is fully compatible with standard Fortran 77
949    storage by columns.
950 
951    The data input variable is intended primarily for Fortran programmers
952    who wish to allocate their own matrix memory space.  Most users should
953    set data=PETSC_NULL.
954 
955    The user MUST specify either the local or global matrix dimensions
956    (possibly both).
957 
958    Currently, the only parallel dense matrix decomposition is by rows,
959    so that n=N and each submatrix owns all of the global columns.
960 
961    Level: intermediate
962 
963 .keywords: matrix, dense, parallel
964 
965 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
966 @*/
967 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
968 {
969   Mat          mat;
970   Mat_MPIDense *a;
971   int          ierr, i,flg;
972 
973   PetscFunctionBegin;
974   /* Note:  For now, when data is specified above, this assumes the user correctly
975    allocates the local dense storage space.  We should add error checking. */
976 
977   *A = 0;
978   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
979   PLogObjectCreate(mat);
980   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
981   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
982   mat->ops->destroy = MatDestroy_MPIDense;
983   mat->ops->view    = MatView_MPIDense;
984   mat->factor       = 0;
985   mat->mapping      = 0;
986 
987   a->factor       = 0;
988   mat->insertmode = NOT_SET_VALUES;
989   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
990   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
991 
992   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
993 
994   /*
995      The computation of n is wrong below, n should represent the number of local
996      rows in the right (column vector)
997   */
998 
999   /* each row stores all columns */
1000   if (N == PETSC_DECIDE) N = n;
1001   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1002   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
1003   a->N = mat->N = N;
1004   a->M = mat->M = M;
1005   a->m = mat->m = m;
1006   a->n = mat->n = n;
1007 
1008   /* the information in the maps duplicates the information computed below, eventually
1009      we should remove the duplicate information that is not contained in the maps */
1010   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1011   ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr);
1012 
1013   /* build local table of row and column ownerships */
1014   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1015   a->cowners = a->rowners + a->size + 1;
1016   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1017   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1018   a->rowners[0] = 0;
1019   for ( i=2; i<=a->size; i++ ) {
1020     a->rowners[i] += a->rowners[i-1];
1021   }
1022   a->rstart = a->rowners[a->rank];
1023   a->rend   = a->rowners[a->rank+1];
1024   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1025   a->cowners[0] = 0;
1026   for ( i=2; i<=a->size; i++ ) {
1027     a->cowners[i] += a->cowners[i-1];
1028   }
1029 
1030   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1031   PLogObjectParent(mat,a->A);
1032 
1033   /* build cache for off array entries formed */
1034   a->donotstash = 0;
1035   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1036 
1037   /* stuff used for matrix vector multiply */
1038   a->lvec        = 0;
1039   a->Mvctx       = 0;
1040   a->roworiented = 1;
1041 
1042   *A = mat;
1043   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1044   if (flg) {
1045     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1046   }
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 #undef __FUNC__
1051 #define __FUNC__ "MatDuplicate_MPIDense"
1052 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1053 {
1054   Mat          mat;
1055   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1056   int          ierr;
1057   FactorCtx    *factor;
1058 
1059   PetscFunctionBegin;
1060   *newmat       = 0;
1061   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1062   PLogObjectCreate(mat);
1063   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1064   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1065   mat->ops->destroy = MatDestroy_MPIDense;
1066   mat->ops->view    = MatView_MPIDense;
1067   mat->factor       = A->factor;
1068   mat->assembled    = PETSC_TRUE;
1069 
1070   a->m = mat->m = oldmat->m;
1071   a->n = mat->n = oldmat->n;
1072   a->M = mat->M = oldmat->M;
1073   a->N = mat->N = oldmat->N;
1074   if (oldmat->factor) {
1075     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1076     /* copy factor contents ... add this code! */
1077   } else a->factor = 0;
1078 
1079   a->rstart       = oldmat->rstart;
1080   a->rend         = oldmat->rend;
1081   a->size         = oldmat->size;
1082   a->rank         = oldmat->rank;
1083   mat->insertmode = NOT_SET_VALUES;
1084   a->donotstash   = oldmat->donotstash;
1085   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1086   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1087   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1088   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1089 
1090   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1091   PLogObjectParent(mat,a->lvec);
1092   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1093   PLogObjectParent(mat,a->Mvctx);
1094   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1095   PLogObjectParent(mat,a->A);
1096   *newmat = mat;
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #include "sys.h"
1101 
1102 #undef __FUNC__
1103 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1104 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1105 {
1106   int        *rowners, i,size,rank,m,ierr,nz,j;
1107   Scalar     *array,*vals,*vals_ptr;
1108   MPI_Status status;
1109 
1110   PetscFunctionBegin;
1111   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1112   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1113 
1114   /* determine ownership of all rows */
1115   m          = M/size + ((M % size) > rank);
1116   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1117   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1118   rowners[0] = 0;
1119   for ( i=2; i<=size; i++ ) {
1120     rowners[i] += rowners[i-1];
1121   }
1122 
1123   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1124   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1125 
1126   if (!rank) {
1127     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1128 
1129     /* read in my part of the matrix numerical values  */
1130     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1131 
1132     /* insert into matrix-by row (this is why cannot directly read into array */
1133     vals_ptr = vals;
1134     for ( i=0; i<m; i++ ) {
1135       for ( j=0; j<N; j++ ) {
1136         array[i + j*m] = *vals_ptr++;
1137       }
1138     }
1139 
1140     /* read in other processors and ship out */
1141     for ( i=1; i<size; i++ ) {
1142       nz   = (rowners[i+1] - rowners[i])*N;
1143       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1144       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1145     }
1146   } else {
1147     /* receive numeric values */
1148     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1149 
1150     /* receive message of values*/
1151     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1152 
1153     /* insert into matrix-by row (this is why cannot directly read into array */
1154     vals_ptr = vals;
1155     for ( i=0; i<m; i++ ) {
1156       for ( j=0; j<N; j++ ) {
1157         array[i + j*m] = *vals_ptr++;
1158       }
1159     }
1160   }
1161   ierr = PetscFree(rowners);CHKERRQ(ierr);
1162   ierr = PetscFree(vals);CHKERRQ(ierr);
1163   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1164   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 
1169 #undef __FUNC__
1170 #define __FUNC__ "MatLoad_MPIDense"
1171 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1172 {
1173   Mat          A;
1174   Scalar       *vals,*svals;
1175   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1176   MPI_Status   status;
1177   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1178   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1179   int          tag = ((PetscObject)viewer)->tag;
1180   int          i, nz, ierr, j,rstart, rend, fd;
1181 
1182   PetscFunctionBegin;
1183   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1184   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1185   if (!rank) {
1186     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1187     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1188     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1189   }
1190 
1191   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1192   M = header[1]; N = header[2]; nz = header[3];
1193 
1194   /*
1195        Handle case where matrix is stored on disk as a dense matrix
1196   */
1197   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1198     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1199     PetscFunctionReturn(0);
1200   }
1201 
1202   /* determine ownership of all rows */
1203   m          = M/size + ((M % size) > rank);
1204   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1205   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1206   rowners[0] = 0;
1207   for ( i=2; i<=size; i++ ) {
1208     rowners[i] += rowners[i-1];
1209   }
1210   rstart = rowners[rank];
1211   rend   = rowners[rank+1];
1212 
1213   /* distribute row lengths to all processors */
1214   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1215   offlens = ourlens + (rend-rstart);
1216   if (!rank) {
1217     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1218     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1219     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1220     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1221     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1222     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1223   } else {
1224     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1225   }
1226 
1227   if (!rank) {
1228     /* calculate the number of nonzeros on each processor */
1229     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1230     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1231     for ( i=0; i<size; i++ ) {
1232       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1233         procsnz[i] += rowlengths[j];
1234       }
1235     }
1236     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1237 
1238     /* determine max buffer needed and allocate it */
1239     maxnz = 0;
1240     for ( i=0; i<size; i++ ) {
1241       maxnz = PetscMax(maxnz,procsnz[i]);
1242     }
1243     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1244 
1245     /* read in my part of the matrix column indices  */
1246     nz = procsnz[0];
1247     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1248     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1249 
1250     /* read in every one elses and ship off */
1251     for ( i=1; i<size; i++ ) {
1252       nz   = procsnz[i];
1253       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1254       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1255     }
1256     ierr = PetscFree(cols);CHKERRQ(ierr);
1257   } else {
1258     /* determine buffer space needed for message */
1259     nz = 0;
1260     for ( i=0; i<m; i++ ) {
1261       nz += ourlens[i];
1262     }
1263     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1264 
1265     /* receive message of column indices*/
1266     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1267     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1268     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1269   }
1270 
1271   /* loop over local rows, determining number of off diagonal entries */
1272   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1273   jj = 0;
1274   for ( i=0; i<m; i++ ) {
1275     for ( j=0; j<ourlens[i]; j++ ) {
1276       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1277       jj++;
1278     }
1279   }
1280 
1281   /* create our matrix */
1282   for ( i=0; i<m; i++ ) {
1283     ourlens[i] -= offlens[i];
1284   }
1285   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1286   A = *newmat;
1287   for ( i=0; i<m; i++ ) {
1288     ourlens[i] += offlens[i];
1289   }
1290 
1291   if (!rank) {
1292     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1293 
1294     /* read in my part of the matrix numerical values  */
1295     nz = procsnz[0];
1296     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1297 
1298     /* insert into matrix */
1299     jj      = rstart;
1300     smycols = mycols;
1301     svals   = vals;
1302     for ( i=0; i<m; i++ ) {
1303       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1304       smycols += ourlens[i];
1305       svals   += ourlens[i];
1306       jj++;
1307     }
1308 
1309     /* read in other processors and ship out */
1310     for ( i=1; i<size; i++ ) {
1311       nz   = procsnz[i];
1312       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1313       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1314     }
1315     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1316   } else {
1317     /* receive numeric values */
1318     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1319 
1320     /* receive message of values*/
1321     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1322     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1323     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1324 
1325     /* insert into matrix */
1326     jj      = rstart;
1327     smycols = mycols;
1328     svals   = vals;
1329     for ( i=0; i<m; i++ ) {
1330       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1331       smycols += ourlens[i];
1332       svals   += ourlens[i];
1333       jj++;
1334     }
1335   }
1336   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1337   ierr = PetscFree(vals);CHKERRQ(ierr);
1338   ierr = PetscFree(mycols);CHKERRQ(ierr);
1339   ierr = PetscFree(rowners);CHKERRQ(ierr);
1340 
1341   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1342   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 
1347 
1348 
1349 
1350