xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 549d3d68a6ae470532d58d544870024f02ff2d7c)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.112 1999/04/19 22:11:55 bsmith Exp balay $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 extern int MatSetUpMultiply_MPIDense(Mat);
13 
14 #undef __FUNC__
15 #define __FUNC__ "MatSetValues_MPIDense"
16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
17 {
18   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
19   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
20   int          roworiented = A->roworiented;
21 
22   PetscFunctionBegin;
23   for ( i=0; i<m; i++ ) {
24     if (idxm[i] < 0) continue;
25     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
26     if (idxm[i] >= rstart && idxm[i] < rend) {
27       row = idxm[i] - rstart;
28       if (roworiented) {
29         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
30       } else {
31         for ( j=0; j<n; j++ ) {
32           if (idxn[j] < 0) continue;
33           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
34           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
35         }
36       }
37     } else {
38       if ( !A->donotstash) {
39         if (roworiented) {
40           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
41         } else {
42           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
43         }
44       }
45     }
46   }
47   PetscFunctionReturn(0);
48 }
49 
50 #undef __FUNC__
51 #define __FUNC__ "MatGetValues_MPIDense"
52 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
53 {
54   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
55   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
56 
57   PetscFunctionBegin;
58   for ( i=0; i<m; i++ ) {
59     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
60     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
61     if (idxm[i] >= rstart && idxm[i] < rend) {
62       row = idxm[i] - rstart;
63       for ( j=0; j<n; j++ ) {
64         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
65         if (idxn[j] >= mdn->N) {
66           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
67         }
68         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
69       }
70     } else {
71       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetArray_MPIDense"
79 int MatGetArray_MPIDense(Mat A,Scalar **array)
80 {
81   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82   int          ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNC__
90 #define __FUNC__ "MatRestoreArray_MPIDense"
91 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
92 {
93   PetscFunctionBegin;
94   PetscFunctionReturn(0);
95 }
96 
97 #undef __FUNC__
98 #define __FUNC__ "MatAssemblyBegin_MPIDense"
99 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
100 {
101   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
102   MPI_Comm     comm = mat->comm;
103   int          ierr,nstash,reallocs;
104   InsertMode   addv;
105 
106   PetscFunctionBegin;
107   /* make sure all processors are either in INSERTMODE or ADDMODE */
108   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
109   if (addv == (ADD_VALUES|INSERT_VALUES)) {
110     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
111   }
112   mat->insertmode = addv; /* in case this processor had no cache */
113 
114   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
115   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
116   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
117            nstash,reallocs);
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNC__
122 #define __FUNC__ "MatAssemblyEnd_MPIDense"
123 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
124 {
125   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
126   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
127   Scalar       *val;
128   InsertMode   addv=mat->insertmode;
129 
130   PetscFunctionBegin;
131   /*  wait on receives */
132   while (1) {
133     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
134     if (!flg) break;
135 
136     for ( i=0; i<n; ) {
137       /* Now identify the consecutive vals belonging to the same row */
138       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
139       if (j < n) ncols = j-i;
140       else       ncols = n-i;
141       /* Now assemble all these values with a single function call */
142       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
143       i = j;
144     }
145   }
146   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
147 
148   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
149   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
150 
151   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
152     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
153   }
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNC__
158 #define __FUNC__ "MatZeroEntries_MPIDense"
159 int MatZeroEntries_MPIDense(Mat A)
160 {
161   int          ierr;
162   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
163 
164   PetscFunctionBegin;
165   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNC__
170 #define __FUNC__ "MatGetBlockSize_MPIDense"
171 int MatGetBlockSize_MPIDense(Mat A,int *bs)
172 {
173   PetscFunctionBegin;
174   *bs = 1;
175   PetscFunctionReturn(0);
176 }
177 
178 /* the code does not do the diagonal entries correctly unless the
179    matrix is square and the column and row owerships are identical.
180    This is a BUG. The only way to fix it seems to be to access
181    mdn->A and mdn->B directly and not through the MatZeroRows()
182    routine.
183 */
184 #undef __FUNC__
185 #define __FUNC__ "MatZeroRows_MPIDense"
186 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
187 {
188   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
189   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
190   int            *procs,*nprocs,j,found,idx,nsends,*work;
191   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
192   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
193   int            *lens,imdex,*lrows,*values;
194   MPI_Comm       comm = A->comm;
195   MPI_Request    *send_waits,*recv_waits;
196   MPI_Status     recv_status,*send_status;
197   IS             istmp;
198 
199   PetscFunctionBegin;
200   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
201   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
202 
203   /*  first count number of contributors to each processor */
204   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
205   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
206   procs  = nprocs + size;
207   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
208   for ( i=0; i<N; i++ ) {
209     idx = rows[i];
210     found = 0;
211     for ( j=0; j<size; j++ ) {
212       if (idx >= owners[j] && idx < owners[j+1]) {
213         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
214       }
215     }
216     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
217   }
218   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
219 
220   /* inform other processors of number of messages and max length*/
221   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
222   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
223   nrecvs = work[rank];
224   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
225   nmax   = work[rank];
226   PetscFree(work);
227 
228   /* post receives:   */
229   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
230   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
231   for ( i=0; i<nrecvs; i++ ) {
232     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
233   }
234 
235   /* do sends:
236       1) starts[i] gives the starting index in svalues for stuff going to
237          the ith processor
238   */
239   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
240   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
241   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
242   starts[0]  = 0;
243   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
244   for ( i=0; i<N; i++ ) {
245     svalues[starts[owner[i]]++] = rows[i];
246   }
247   ISRestoreIndices(is,&rows);
248 
249   starts[0] = 0;
250   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
251   count = 0;
252   for ( i=0; i<size; i++ ) {
253     if (procs[i]) {
254       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
255     }
256   }
257   PetscFree(starts);
258 
259   base = owners[rank];
260 
261   /*  wait on receives */
262   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
263   source = lens + nrecvs;
264   count  = nrecvs; slen = 0;
265   while (count) {
266     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
267     /* unpack receives into our local space */
268     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
269     source[imdex]  = recv_status.MPI_SOURCE;
270     lens[imdex]  = n;
271     slen += n;
272     count--;
273   }
274   PetscFree(recv_waits);
275 
276   /* move the data into the send scatter */
277   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
278   count = 0;
279   for ( i=0; i<nrecvs; i++ ) {
280     values = rvalues + i*nmax;
281     for ( j=0; j<lens[i]; j++ ) {
282       lrows[count++] = values[j] - base;
283     }
284   }
285   PetscFree(rvalues); PetscFree(lens);
286   PetscFree(owner); PetscFree(nprocs);
287 
288   /* actually zap the local rows */
289   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
290   PLogObjectParent(A,istmp);
291   PetscFree(lrows);
292   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
293   ierr = ISDestroy(istmp);CHKERRQ(ierr);
294 
295   /* wait on sends */
296   if (nsends) {
297     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
298     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
299     PetscFree(send_status);
300   }
301   PetscFree(send_waits); PetscFree(svalues);
302 
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNC__
307 #define __FUNC__ "MatMult_MPIDense"
308 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
309 {
310   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
311   int          ierr;
312 
313   PetscFunctionBegin;
314   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
315   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
316   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 #undef __FUNC__
321 #define __FUNC__ "MatMultAdd_MPIDense"
322 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
323 {
324   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
325   int          ierr;
326 
327   PetscFunctionBegin;
328   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
329   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
330   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNC__
335 #define __FUNC__ "MatMultTrans_MPIDense"
336 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
337 {
338   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
339   int          ierr;
340   Scalar       zero = 0.0;
341 
342   PetscFunctionBegin;
343   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
344   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
345   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
346   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNC__
351 #define __FUNC__ "MatMultTransAdd_MPIDense"
352 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
353 {
354   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
355   int          ierr;
356 
357   PetscFunctionBegin;
358   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
359   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
360   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
361   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNC__
366 #define __FUNC__ "MatGetDiagonal_MPIDense"
367 int MatGetDiagonal_MPIDense(Mat A,Vec v)
368 {
369   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
370   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
371   int          ierr, len, i, n, m = a->m, radd;
372   Scalar       *x, zero = 0.0;
373 
374   PetscFunctionBegin;
375   VecSet(&zero,v);
376   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
377   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
378   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
379   len = PetscMin(aloc->m,aloc->n);
380   radd = a->rstart*m;
381   for ( i=0; i<len; i++ ) {
382     x[i] = aloc->v[radd + i*m + i];
383   }
384   PetscFunctionReturn(0);
385 }
386 
387 #undef __FUNC__
388 #define __FUNC__ "MatDestroy_MPIDense"
389 int MatDestroy_MPIDense(Mat mat)
390 {
391   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
392   int          ierr;
393 
394   PetscFunctionBegin;
395   if (--mat->refct > 0) PetscFunctionReturn(0);
396 
397   if (mat->mapping) {
398     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
399   }
400   if (mat->bmapping) {
401     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
402   }
403 #if defined(USE_PETSC_LOG)
404   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
405 #endif
406   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
407   PetscFree(mdn->rowners);
408   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
409   if (mdn->lvec)   VecDestroy(mdn->lvec);
410   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
411   if (mdn->factor) {
412     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
413     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
414     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
415     PetscFree(mdn->factor);
416   }
417   PetscFree(mdn);
418   if (mat->rmap) {
419     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
420   }
421   if (mat->cmap) {
422     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
423   }
424   PLogObjectDestroy(mat);
425   PetscHeaderDestroy(mat);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNC__
430 #define __FUNC__ "MatView_MPIDense_Binary"
431 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
432 {
433   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
434   int          ierr;
435 
436   PetscFunctionBegin;
437   if (mdn->size == 1) {
438     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
439   }
440   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNC__
445 #define __FUNC__ "MatView_MPIDense_ASCII"
446 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
447 {
448   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
449   int          ierr, format, size = mdn->size, rank = mdn->rank;
450   FILE         *fd;
451   ViewerType   vtype;
452 
453   PetscFunctionBegin;
454   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
455   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
456   ierr = ViewerGetFormat(viewer,&format);
457   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
458     MatInfo info;
459     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
460     PetscSequentialPhaseBegin(mat->comm,1);
461       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
462          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
463       fflush(fd);
464     PetscSequentialPhaseEnd(mat->comm,1);
465     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
466     PetscFunctionReturn(0);
467   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
468     PetscFunctionReturn(0);
469   }
470 
471   if (size == 1) {
472     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
473   } else {
474     /* assemble the entire matrix onto first processor. */
475     Mat          A;
476     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
477     Scalar       *vals;
478     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
479 
480     if (!rank) {
481       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
482     } else {
483       ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
484     }
485     PLogObjectParent(mat,A);
486 
487     /* Copy the matrix ... This isn't the most efficient means,
488        but it's quick for now */
489     row = mdn->rstart; m = Amdn->m;
490     for ( i=0; i<m; i++ ) {
491       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
492       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
493       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
494       row++;
495     }
496 
497     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
498     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
499     if (!rank) {
500       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr);
501     }
502     ierr = MatDestroy(A);CHKERRQ(ierr);
503   }
504   PetscFunctionReturn(0);
505 }
506 
507 #undef __FUNC__
508 #define __FUNC__ "MatView_MPIDense"
509 int MatView_MPIDense(Mat mat,Viewer viewer)
510 {
511   int          ierr;
512   ViewerType   vtype;
513 
514   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
515   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
516     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
517   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
518     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
519   } else {
520     SETERRQ(1,1,"Viewer type not supported by PETSc object");
521   }
522   PetscFunctionReturn(0);
523 }
524 
525 #undef __FUNC__
526 #define __FUNC__ "MatGetInfo_MPIDense"
527 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
528 {
529   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
530   Mat          mdn = mat->A;
531   int          ierr;
532   double       isend[5], irecv[5];
533 
534   PetscFunctionBegin;
535   info->rows_global    = (double)mat->M;
536   info->columns_global = (double)mat->N;
537   info->rows_local     = (double)mat->m;
538   info->columns_local  = (double)mat->N;
539   info->block_size     = 1.0;
540   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
541   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
542   isend[3] = info->memory;  isend[4] = info->mallocs;
543   if (flag == MAT_LOCAL) {
544     info->nz_used      = isend[0];
545     info->nz_allocated = isend[1];
546     info->nz_unneeded  = isend[2];
547     info->memory       = isend[3];
548     info->mallocs      = isend[4];
549   } else if (flag == MAT_GLOBAL_MAX) {
550     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
551     info->nz_used      = irecv[0];
552     info->nz_allocated = irecv[1];
553     info->nz_unneeded  = irecv[2];
554     info->memory       = irecv[3];
555     info->mallocs      = irecv[4];
556   } else if (flag == MAT_GLOBAL_SUM) {
557     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
558     info->nz_used      = irecv[0];
559     info->nz_allocated = irecv[1];
560     info->nz_unneeded  = irecv[2];
561     info->memory       = irecv[3];
562     info->mallocs      = irecv[4];
563   }
564   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
565   info->fill_ratio_needed = 0;
566   info->factor_mallocs    = 0;
567   PetscFunctionReturn(0);
568 }
569 
570 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
571    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
572    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
573    extern int MatSolve_MPIDense(Mat,Vec,Vec);
574    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
575    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
576    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
577 
578 #undef __FUNC__
579 #define __FUNC__ "MatSetOption_MPIDense"
580 int MatSetOption_MPIDense(Mat A,MatOption op)
581 {
582   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
583 
584   PetscFunctionBegin;
585   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
586       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
587       op == MAT_NEW_NONZERO_LOCATION_ERR ||
588       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
589       op == MAT_COLUMNS_SORTED ||
590       op == MAT_COLUMNS_UNSORTED) {
591         MatSetOption(a->A,op);
592   } else if (op == MAT_ROW_ORIENTED) {
593         a->roworiented = 1;
594         MatSetOption(a->A,op);
595   } else if (op == MAT_ROWS_SORTED ||
596              op == MAT_ROWS_UNSORTED ||
597              op == MAT_SYMMETRIC ||
598              op == MAT_STRUCTURALLY_SYMMETRIC ||
599              op == MAT_YES_NEW_DIAGONALS ||
600              op == MAT_USE_HASH_TABLE) {
601     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
602   } else if (op == MAT_COLUMN_ORIENTED) {
603     a->roworiented = 0; MatSetOption(a->A,op);
604   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
605     a->donotstash = 1;
606   } else if (op == MAT_NO_NEW_DIAGONALS) {
607     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
608   } else {
609     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 #undef __FUNC__
615 #define __FUNC__ "MatGetSize_MPIDense"
616 int MatGetSize_MPIDense(Mat A,int *m,int *n)
617 {
618   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
619 
620   PetscFunctionBegin;
621   *m = mat->M; *n = mat->N;
622   PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNC__
626 #define __FUNC__ "MatGetLocalSize_MPIDense"
627 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
628 {
629   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
630 
631   PetscFunctionBegin;
632   *m = mat->m; *n = mat->N;
633   PetscFunctionReturn(0);
634 }
635 
636 #undef __FUNC__
637 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
638 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
639 {
640   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
641 
642   PetscFunctionBegin;
643   *m = mat->rstart; *n = mat->rend;
644   PetscFunctionReturn(0);
645 }
646 
647 #undef __FUNC__
648 #define __FUNC__ "MatGetRow_MPIDense"
649 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
650 {
651   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
652   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
653 
654   PetscFunctionBegin;
655   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
656   lrow = row - rstart;
657   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 #undef __FUNC__
662 #define __FUNC__ "MatRestoreRow_MPIDense"
663 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
664 {
665   PetscFunctionBegin;
666   if (idx) PetscFree(*idx);
667   if (v) PetscFree(*v);
668   PetscFunctionReturn(0);
669 }
670 
671 #undef __FUNC__
672 #define __FUNC__ "MatNorm_MPIDense"
673 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
674 {
675   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
676   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
677   int          ierr, i, j;
678   double       sum = 0.0;
679   Scalar       *v = mat->v;
680 
681   PetscFunctionBegin;
682   if (mdn->size == 1) {
683     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
684   } else {
685     if (type == NORM_FROBENIUS) {
686       for (i=0; i<mat->n*mat->m; i++ ) {
687 #if defined(USE_PETSC_COMPLEX)
688         sum += PetscReal(PetscConj(*v)*(*v)); v++;
689 #else
690         sum += (*v)*(*v); v++;
691 #endif
692       }
693       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
694       *norm = sqrt(*norm);
695       PLogFlops(2*mat->n*mat->m);
696     } else if (type == NORM_1) {
697       double *tmp, *tmp2;
698       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
699       tmp2 = tmp + mdn->N;
700       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
701       *norm = 0.0;
702       v = mat->v;
703       for ( j=0; j<mat->n; j++ ) {
704         for ( i=0; i<mat->m; i++ ) {
705           tmp[j] += PetscAbsScalar(*v);  v++;
706         }
707       }
708       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
709       for ( j=0; j<mdn->N; j++ ) {
710         if (tmp2[j] > *norm) *norm = tmp2[j];
711       }
712       PetscFree(tmp);
713       PLogFlops(mat->n*mat->m);
714     } else if (type == NORM_INFINITY) { /* max row norm */
715       double ntemp;
716       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
717       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
718     } else {
719       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
720     }
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNC__
726 #define __FUNC__ "MatTranspose_MPIDense"
727 int MatTranspose_MPIDense(Mat A,Mat *matout)
728 {
729   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
730   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
731   Mat          B;
732   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
733   int          j, i, ierr;
734   Scalar       *v;
735 
736   PetscFunctionBegin;
737   if (matout == PETSC_NULL && M != N) {
738     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
739   }
740   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
741 
742   m = Aloc->m; n = Aloc->n; v = Aloc->v;
743   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
744   for ( j=0; j<n; j++ ) {
745     for (i=0; i<m; i++) rwork[i] = rstart + i;
746     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
747     v   += m;
748   }
749   PetscFree(rwork);
750   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
752   if (matout != PETSC_NULL) {
753     *matout = B;
754   } else {
755     PetscOps *Abops;
756     MatOps   Aops;
757 
758     /* This isn't really an in-place transpose, but free data struct from a */
759     PetscFree(a->rowners);
760     ierr = MatDestroy(a->A);CHKERRQ(ierr);
761     if (a->lvec) VecDestroy(a->lvec);
762     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
763     PetscFree(a);
764 
765     /*
766          This is horrible, horrible code. We need to keep the
767       A pointers for the bops and ops but copy everything
768       else from C.
769     */
770     Abops   = A->bops;
771     Aops    = A->ops;
772     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
773     A->bops = Abops;
774     A->ops  = Aops;
775 
776     PetscHeaderDestroy(B);
777   }
778   PetscFunctionReturn(0);
779 }
780 
781 #include "pinclude/blaslapack.h"
782 #undef __FUNC__
783 #define __FUNC__ "MatScale_MPIDense"
784 int MatScale_MPIDense(Scalar *alpha,Mat inA)
785 {
786   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
787   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
788   int          one = 1, nz;
789 
790   PetscFunctionBegin;
791   nz = a->m*a->n;
792   BLscal_( &nz, alpha, a->v, &one );
793   PLogFlops(nz);
794   PetscFunctionReturn(0);
795 }
796 
797 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
798 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
799 
800 /* -------------------------------------------------------------------*/
801 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
802        MatGetRow_MPIDense,
803        MatRestoreRow_MPIDense,
804        MatMult_MPIDense,
805        MatMultAdd_MPIDense,
806        MatMultTrans_MPIDense,
807        MatMultTransAdd_MPIDense,
808        0,
809        0,
810        0,
811        0,
812        0,
813        0,
814        0,
815        MatTranspose_MPIDense,
816        MatGetInfo_MPIDense,0,
817        MatGetDiagonal_MPIDense,
818        0,
819        MatNorm_MPIDense,
820        MatAssemblyBegin_MPIDense,
821        MatAssemblyEnd_MPIDense,
822        0,
823        MatSetOption_MPIDense,
824        MatZeroEntries_MPIDense,
825        MatZeroRows_MPIDense,
826        0,
827        0,
828        0,
829        0,
830        MatGetSize_MPIDense,
831        MatGetLocalSize_MPIDense,
832        MatGetOwnershipRange_MPIDense,
833        0,
834        0,
835        MatGetArray_MPIDense,
836        MatRestoreArray_MPIDense,
837        MatDuplicate_MPIDense,
838        0,
839        0,
840        0,
841        0,
842        0,
843        MatGetSubMatrices_MPIDense,
844        0,
845        MatGetValues_MPIDense,
846        0,
847        0,
848        MatScale_MPIDense,
849        0,
850        0,
851        0,
852        MatGetBlockSize_MPIDense,
853        0,
854        0,
855        0,
856        0,
857        0,
858        0,
859        0,
860        0,
861        0,
862        0,
863        0,
864        0,
865        MatGetMaps_Petsc};
866 
867 #undef __FUNC__
868 #define __FUNC__ "MatCreateMPIDense"
869 /*@C
870    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
871 
872    Collective on MPI_Comm
873 
874    Input Parameters:
875 +  comm - MPI communicator
876 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
877 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
878 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
879 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
880 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
881    to control all matrix memory allocation.
882 
883    Output Parameter:
884 .  A - the matrix
885 
886    Notes:
887    The dense format is fully compatible with standard Fortran 77
888    storage by columns.
889 
890    The data input variable is intended primarily for Fortran programmers
891    who wish to allocate their own matrix memory space.  Most users should
892    set data=PETSC_NULL.
893 
894    The user MUST specify either the local or global matrix dimensions
895    (possibly both).
896 
897    Currently, the only parallel dense matrix decomposition is by rows,
898    so that n=N and each submatrix owns all of the global columns.
899 
900    Level: intermediate
901 
902 .keywords: matrix, dense, parallel
903 
904 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
905 @*/
906 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
907 {
908   Mat          mat;
909   Mat_MPIDense *a;
910   int          ierr, i,flg;
911 
912   PetscFunctionBegin;
913   /* Note:  For now, when data is specified above, this assumes the user correctly
914    allocates the local dense storage space.  We should add error checking. */
915 
916   *A = 0;
917   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
918   PLogObjectCreate(mat);
919   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
920   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
921   mat->ops->destroy = MatDestroy_MPIDense;
922   mat->ops->view    = MatView_MPIDense;
923   mat->factor       = 0;
924   mat->mapping      = 0;
925 
926   a->factor       = 0;
927   mat->insertmode = NOT_SET_VALUES;
928   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
929   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
930 
931   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
932 
933   /*
934      The computation of n is wrong below, n should represent the number of local
935      rows in the right (column vector)
936   */
937 
938   /* each row stores all columns */
939   if (N == PETSC_DECIDE) N = n;
940   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
941   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
942   a->N = mat->N = N;
943   a->M = mat->M = M;
944   a->m = mat->m = m;
945   a->n = mat->n = n;
946 
947   /* the information in the maps duplicates the information computed below, eventually
948      we should remove the duplicate information that is not contained in the maps */
949   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
950   ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr);
951 
952   /* build local table of row and column ownerships */
953   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
954   a->cowners = a->rowners + a->size + 1;
955   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
956   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
957   a->rowners[0] = 0;
958   for ( i=2; i<=a->size; i++ ) {
959     a->rowners[i] += a->rowners[i-1];
960   }
961   a->rstart = a->rowners[a->rank];
962   a->rend   = a->rowners[a->rank+1];
963   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
964   a->cowners[0] = 0;
965   for ( i=2; i<=a->size; i++ ) {
966     a->cowners[i] += a->cowners[i-1];
967   }
968 
969   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
970   PLogObjectParent(mat,a->A);
971 
972   /* build cache for off array entries formed */
973   a->donotstash = 0;
974   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
975 
976   /* stuff used for matrix vector multiply */
977   a->lvec        = 0;
978   a->Mvctx       = 0;
979   a->roworiented = 1;
980 
981   *A = mat;
982   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
983   if (flg) {
984     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
985   }
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNC__
990 #define __FUNC__ "MatDuplicate_MPIDense"
991 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
992 {
993   Mat          mat;
994   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
995   int          ierr;
996   FactorCtx    *factor;
997 
998   PetscFunctionBegin;
999   *newmat       = 0;
1000   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1001   PLogObjectCreate(mat);
1002   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1003   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1004   mat->ops->destroy = MatDestroy_MPIDense;
1005   mat->ops->view    = MatView_MPIDense;
1006   mat->factor       = A->factor;
1007   mat->assembled    = PETSC_TRUE;
1008 
1009   a->m = mat->m = oldmat->m;
1010   a->n = mat->n = oldmat->n;
1011   a->M = mat->M = oldmat->M;
1012   a->N = mat->N = oldmat->N;
1013   if (oldmat->factor) {
1014     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1015     /* copy factor contents ... add this code! */
1016   } else a->factor = 0;
1017 
1018   a->rstart       = oldmat->rstart;
1019   a->rend         = oldmat->rend;
1020   a->size         = oldmat->size;
1021   a->rank         = oldmat->rank;
1022   mat->insertmode = NOT_SET_VALUES;
1023   a->donotstash   = oldmat->donotstash;
1024   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1025   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1026   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1027   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1028 
1029   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1030   PLogObjectParent(mat,a->lvec);
1031   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1032   PLogObjectParent(mat,a->Mvctx);
1033   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1034   PLogObjectParent(mat,a->A);
1035   *newmat = mat;
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 #include "sys.h"
1040 
1041 #undef __FUNC__
1042 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1043 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1044 {
1045   int        *rowners, i,size,rank,m,ierr,nz,j;
1046   Scalar     *array,*vals,*vals_ptr;
1047   MPI_Status status;
1048 
1049   PetscFunctionBegin;
1050   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1051   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1052 
1053   /* determine ownership of all rows */
1054   m          = M/size + ((M % size) > rank);
1055   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1056   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1057   rowners[0] = 0;
1058   for ( i=2; i<=size; i++ ) {
1059     rowners[i] += rowners[i-1];
1060   }
1061 
1062   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1063   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1064 
1065   if (!rank) {
1066     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1067 
1068     /* read in my part of the matrix numerical values  */
1069     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1070 
1071     /* insert into matrix-by row (this is why cannot directly read into array */
1072     vals_ptr = vals;
1073     for ( i=0; i<m; i++ ) {
1074       for ( j=0; j<N; j++ ) {
1075         array[i + j*m] = *vals_ptr++;
1076       }
1077     }
1078 
1079     /* read in other processors and ship out */
1080     for ( i=1; i<size; i++ ) {
1081       nz   = (rowners[i+1] - rowners[i])*N;
1082       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1083       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1084     }
1085   } else {
1086     /* receive numeric values */
1087     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1088 
1089     /* receive message of values*/
1090     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1091 
1092     /* insert into matrix-by row (this is why cannot directly read into array */
1093     vals_ptr = vals;
1094     for ( i=0; i<m; i++ ) {
1095       for ( j=0; j<N; j++ ) {
1096         array[i + j*m] = *vals_ptr++;
1097       }
1098     }
1099   }
1100   PetscFree(rowners);
1101   PetscFree(vals);
1102   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1103   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 
1108 #undef __FUNC__
1109 #define __FUNC__ "MatLoad_MPIDense"
1110 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1111 {
1112   Mat          A;
1113   Scalar       *vals,*svals;
1114   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1115   MPI_Status   status;
1116   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1117   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1118   int          tag = ((PetscObject)viewer)->tag;
1119   int          i, nz, ierr, j,rstart, rend, fd;
1120 
1121   PetscFunctionBegin;
1122   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1123   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1124   if (!rank) {
1125     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1126     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1127     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1128   }
1129 
1130   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1131   M = header[1]; N = header[2]; nz = header[3];
1132 
1133   /*
1134        Handle case where matrix is stored on disk as a dense matrix
1135   */
1136   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1137     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1138     PetscFunctionReturn(0);
1139   }
1140 
1141   /* determine ownership of all rows */
1142   m          = M/size + ((M % size) > rank);
1143   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1144   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1145   rowners[0] = 0;
1146   for ( i=2; i<=size; i++ ) {
1147     rowners[i] += rowners[i-1];
1148   }
1149   rstart = rowners[rank];
1150   rend   = rowners[rank+1];
1151 
1152   /* distribute row lengths to all processors */
1153   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1154   offlens = ourlens + (rend-rstart);
1155   if (!rank) {
1156     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1157     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1158     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1159     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1160     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1161     PetscFree(sndcounts);
1162   } else {
1163     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1164   }
1165 
1166   if (!rank) {
1167     /* calculate the number of nonzeros on each processor */
1168     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1169     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1170     for ( i=0; i<size; i++ ) {
1171       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1172         procsnz[i] += rowlengths[j];
1173       }
1174     }
1175     PetscFree(rowlengths);
1176 
1177     /* determine max buffer needed and allocate it */
1178     maxnz = 0;
1179     for ( i=0; i<size; i++ ) {
1180       maxnz = PetscMax(maxnz,procsnz[i]);
1181     }
1182     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1183 
1184     /* read in my part of the matrix column indices  */
1185     nz = procsnz[0];
1186     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1187     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1188 
1189     /* read in every one elses and ship off */
1190     for ( i=1; i<size; i++ ) {
1191       nz   = procsnz[i];
1192       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1193       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1194     }
1195     PetscFree(cols);
1196   } else {
1197     /* determine buffer space needed for message */
1198     nz = 0;
1199     for ( i=0; i<m; i++ ) {
1200       nz += ourlens[i];
1201     }
1202     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1203 
1204     /* receive message of column indices*/
1205     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1206     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1207     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1208   }
1209 
1210   /* loop over local rows, determining number of off diagonal entries */
1211   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1212   jj = 0;
1213   for ( i=0; i<m; i++ ) {
1214     for ( j=0; j<ourlens[i]; j++ ) {
1215       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1216       jj++;
1217     }
1218   }
1219 
1220   /* create our matrix */
1221   for ( i=0; i<m; i++ ) {
1222     ourlens[i] -= offlens[i];
1223   }
1224   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1225   A = *newmat;
1226   for ( i=0; i<m; i++ ) {
1227     ourlens[i] += offlens[i];
1228   }
1229 
1230   if (!rank) {
1231     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1232 
1233     /* read in my part of the matrix numerical values  */
1234     nz = procsnz[0];
1235     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1236 
1237     /* insert into matrix */
1238     jj      = rstart;
1239     smycols = mycols;
1240     svals   = vals;
1241     for ( i=0; i<m; i++ ) {
1242       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1243       smycols += ourlens[i];
1244       svals   += ourlens[i];
1245       jj++;
1246     }
1247 
1248     /* read in other processors and ship out */
1249     for ( i=1; i<size; i++ ) {
1250       nz   = procsnz[i];
1251       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1252       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1253     }
1254     PetscFree(procsnz);
1255   } else {
1256     /* receive numeric values */
1257     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1258 
1259     /* receive message of values*/
1260     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1261     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1262     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1263 
1264     /* insert into matrix */
1265     jj      = rstart;
1266     smycols = mycols;
1267     svals   = vals;
1268     for ( i=0; i<m; i++ ) {
1269       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1270       smycols += ourlens[i];
1271       svals   += ourlens[i];
1272       jj++;
1273     }
1274   }
1275   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1276 
1277   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1278   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 
1283 
1284 
1285 
1286