xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision ab94cc7af4fcd5c63a93b94bdec4707d4e0807cd)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.40 1996/04/09 20:24:27 curfman Exp curfman $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "mpidense.h"
10 #include "vec/vecimpl.h"
11 
12 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
13                                int *idxn,Scalar *v,InsertMode addv)
14 {
15   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
16   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
17   int          roworiented = A->roworiented;
18 
19   if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) {
20     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
21   }
22   A->insertmode = addv;
23   for ( i=0; i<m; i++ ) {
24     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
25     if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
26     if (idxm[i] >= rstart && idxm[i] < rend) {
27       row = idxm[i] - rstart;
28       if (roworiented) {
29         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
30       }
31       else {
32         for ( j=0; j<n; j++ ) {
33           if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
34           if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
35           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
36         }
37       }
38     }
39     else {
40       if (roworiented) {
41         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
42       }
43       else { /* must stash each seperately */
44         row = idxm[i];
45         for ( j=0; j<n; j++ ) {
46           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);
47                  CHKERRQ(ierr);
48         }
49       }
50     }
51   }
52   return 0;
53 }
54 
55 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
56 {
57   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
58   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
59 
60   for ( i=0; i<m; i++ ) {
61     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
62     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
63     if (idxm[i] >= rstart && idxm[i] < rend) {
64       row = idxm[i] - rstart;
65       for ( j=0; j<n; j++ ) {
66         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
67         if (idxn[j] >= mdn->N)
68           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
69         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
70       }
71     }
72     else {
73       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
74     }
75   }
76   return 0;
77 }
78 
79 static int MatGetArray_MPIDense(Mat A,Scalar **array)
80 {
81   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82   int ierr;
83 
84   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
85   return 0;
86 }
87 
88 static int MatRestoreArray_MPIDense(Mat A,Scalar **array)
89 {
90   return 0;
91 }
92 
93 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
94 {
95   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
96   MPI_Comm     comm = mat->comm;
97   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
98   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
99   int          tag = mat->tag, *owner,*starts,count,ierr;
100   InsertMode   addv;
101   MPI_Request  *send_waits,*recv_waits;
102   Scalar       *rvalues,*svalues;
103 
104   /* make sure all processors are either in INSERTMODE or ADDMODE */
105   MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
106   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
107     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
108     }
109   mdn->insertmode = addv; /* in case this processor had no cache */
110 
111   /*  first count number of contributors to each processor */
112   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
113   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
114   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
115   for ( i=0; i<mdn->stash.n; i++ ) {
116     idx = mdn->stash.idx[i];
117     for ( j=0; j<size; j++ ) {
118       if (idx >= owners[j] && idx < owners[j+1]) {
119         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
120       }
121     }
122   }
123   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
124 
125   /* inform other processors of number of messages and max length*/
126   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
127   MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);
128   nreceives = work[rank];
129   MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);
130   nmax = work[rank];
131   PetscFree(work);
132 
133   /* post receives:
134        1) each message will consist of ordered pairs
135      (global index,value) we store the global index as a double
136      to simplify the message passing.
137        2) since we don't know how long each individual message is we
138      allocate the largest needed buffer for each receive. Potentially
139      this is a lot of wasted space.
140 
141        This could be done better.
142   */
143   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
144   CHKPTRQ(rvalues);
145   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
146   CHKPTRQ(recv_waits);
147   for ( i=0; i<nreceives; i++ ) {
148     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
149               comm,recv_waits+i);
150   }
151 
152   /* do sends:
153       1) starts[i] gives the starting index in svalues for stuff going to
154          the ith processor
155   */
156   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
157   CHKPTRQ(svalues);
158   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
159   CHKPTRQ(send_waits);
160   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
161   starts[0] = 0;
162   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   for ( i=0; i<mdn->stash.n; i++ ) {
164     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
165     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
166     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
167   }
168   PetscFree(owner);
169   starts[0] = 0;
170   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
171   count = 0;
172   for ( i=0; i<size; i++ ) {
173     if (procs[i]) {
174       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
175                 comm,send_waits+count++);
176     }
177   }
178   PetscFree(starts); PetscFree(nprocs);
179 
180   /* Free cache space */
181   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
182 
183   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
184   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
185   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
186   mdn->rmax       = nmax;
187 
188   return 0;
189 }
190 extern int MatSetUpMultiply_MPIDense(Mat);
191 
192 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
193 {
194   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
195   MPI_Status   *send_status,recv_status;
196   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
197   Scalar       *values,val;
198   InsertMode   addv = mdn->insertmode;
199 
200   /*  wait on receives */
201   while (count) {
202     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
203     /* unpack receives into our local space */
204     values = mdn->rvalues + 3*imdex*mdn->rmax;
205     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
206     n = n/3;
207     for ( i=0; i<n; i++ ) {
208       row = (int) PetscReal(values[3*i]) - mdn->rstart;
209       col = (int) PetscReal(values[3*i+1]);
210       val = values[3*i+2];
211       if (col >= 0 && col < mdn->N) {
212         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
213       }
214       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
215     }
216     count--;
217   }
218   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
219 
220   /* wait on sends */
221   if (mdn->nsends) {
222     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
223     CHKPTRQ(send_status);
224     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
225     PetscFree(send_status);
226   }
227   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
228 
229   mdn->insertmode = NOT_SET_VALUES;
230   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
235   }
236   return 0;
237 }
238 
239 static int MatZeroEntries_MPIDense(Mat A)
240 {
241   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
242   return MatZeroEntries(l->A);
243 }
244 
245 /* the code does not do the diagonal entries correctly unless the
246    matrix is square and the column and row owerships are identical.
247    This is a BUG. The only way to fix it seems to be to access
248    mdn->A and mdn->B directly and not through the MatZeroRows()
249    routine.
250 */
251 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
252 {
253   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
254   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
255   int            *procs,*nprocs,j,found,idx,nsends,*work;
256   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
257   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
258   int            *lens,imdex,*lrows,*values;
259   MPI_Comm       comm = A->comm;
260   MPI_Request    *send_waits,*recv_waits;
261   MPI_Status     recv_status,*send_status;
262   IS             istmp;
263 
264   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
265   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
266 
267   /*  first count number of contributors to each processor */
268   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
269   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
270   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
271   for ( i=0; i<N; i++ ) {
272     idx = rows[i];
273     found = 0;
274     for ( j=0; j<size; j++ ) {
275       if (idx >= owners[j] && idx < owners[j+1]) {
276         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
277       }
278     }
279     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
280   }
281   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
282 
283   /* inform other processors of number of messages and max length*/
284   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
285   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
286   nrecvs = work[rank];
287   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
288   nmax = work[rank];
289   PetscFree(work);
290 
291   /* post receives:   */
292   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
293   CHKPTRQ(rvalues);
294   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
295   CHKPTRQ(recv_waits);
296   for ( i=0; i<nrecvs; i++ ) {
297     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
298   }
299 
300   /* do sends:
301       1) starts[i] gives the starting index in svalues for stuff going to
302          the ith processor
303   */
304   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
305   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
306   CHKPTRQ(send_waits);
307   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
308   starts[0] = 0;
309   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
310   for ( i=0; i<N; i++ ) {
311     svalues[starts[owner[i]]++] = rows[i];
312   }
313   ISRestoreIndices(is,&rows);
314 
315   starts[0] = 0;
316   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
317   count = 0;
318   for ( i=0; i<size; i++ ) {
319     if (procs[i]) {
320       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
321     }
322   }
323   PetscFree(starts);
324 
325   base = owners[rank];
326 
327   /*  wait on receives */
328   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
329   source = lens + nrecvs;
330   count  = nrecvs; slen = 0;
331   while (count) {
332     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
333     /* unpack receives into our local space */
334     MPI_Get_count(&recv_status,MPI_INT,&n);
335     source[imdex]  = recv_status.MPI_SOURCE;
336     lens[imdex]  = n;
337     slen += n;
338     count--;
339   }
340   PetscFree(recv_waits);
341 
342   /* move the data into the send scatter */
343   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
344   count = 0;
345   for ( i=0; i<nrecvs; i++ ) {
346     values = rvalues + i*nmax;
347     for ( j=0; j<lens[i]; j++ ) {
348       lrows[count++] = values[j] - base;
349     }
350   }
351   PetscFree(rvalues); PetscFree(lens);
352   PetscFree(owner); PetscFree(nprocs);
353 
354   /* actually zap the local rows */
355   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
356   PLogObjectParent(A,istmp);
357   PetscFree(lrows);
358   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
359   ierr = ISDestroy(istmp); CHKERRQ(ierr);
360 
361   /* wait on sends */
362   if (nsends) {
363     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
364     CHKPTRQ(send_status);
365     MPI_Waitall(nsends,send_waits,send_status);
366     PetscFree(send_status);
367   }
368   PetscFree(send_waits); PetscFree(svalues);
369 
370   return 0;
371 }
372 
373 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
374 {
375   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
376   int          ierr;
377 
378   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
379   CHKERRQ(ierr);
380   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
381   CHKERRQ(ierr);
382   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
383   return 0;
384 }
385 
386 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
387 {
388   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
389   int          ierr;
390 
391   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
392   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
393   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
394   return 0;
395 }
396 
397 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
398 {
399   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
400   int          ierr;
401   Scalar       zero = 0.0;
402 
403   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
404   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
405   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
406          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
407   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
408          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
409   return 0;
410 }
411 
412 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
413 {
414   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
415   int          ierr;
416 
417   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
418   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
419   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
420          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
421   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
422          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
423   return 0;
424 }
425 
426 static int MatGetDiagonal_MPIDense(Mat A,Vec v)
427 {
428   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
429   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
430   int          ierr, len, i, n, m = a->m, radd;
431   Scalar       *x, zero = 0.0;
432 
433   VecSet(&zero,v);
434   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
435   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
436   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
437   len = PetscMin(aloc->m,aloc->n);
438   radd = a->rstart*m;
439   for ( i=0; i<len; i++ ) {
440     x[i] = aloc->v[radd + i*m + i];
441   }
442   return 0;
443 }
444 
445 static int MatDestroy_MPIDense(PetscObject obj)
446 {
447   Mat          mat = (Mat) obj;
448   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
449   int          ierr;
450 
451 #if defined(PETSC_LOG)
452   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
453 #endif
454   PetscFree(mdn->rowners);
455   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
456   if (mdn->lvec)   VecDestroy(mdn->lvec);
457   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
458   if (mdn->factor) {
459     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
460     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
461     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
462     PetscFree(mdn->factor);
463   }
464   PetscFree(mdn);
465   PLogObjectDestroy(mat);
466   PetscHeaderDestroy(mat);
467   return 0;
468 }
469 
470 #include "pinclude/pviewer.h"
471 
472 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
473 {
474   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
475   int          ierr;
476   if (mdn->size == 1) {
477     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
478   }
479   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
480   return 0;
481 }
482 
483 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
484 {
485   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
486   int          ierr, format;
487   FILE         *fd;
488   ViewerType   vtype;
489 
490   ViewerGetType(viewer,&vtype);
491   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
492   ierr = ViewerGetFormat(viewer,&format);
493   if (format == ASCII_FORMAT_INFO_DETAILED) {
494     int nz, nzalloc, mem, rank;
495     MPI_Comm_rank(mat->comm,&rank);
496     ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
497     PetscSequentialPhaseBegin(mat->comm,1);
498       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
499           rank,mdn->m,nz,nzalloc,mem);
500       fflush(fd);
501     PetscSequentialPhaseEnd(mat->comm,1);
502     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
503     return 0;
504   }
505   else if (format == ASCII_FORMAT_INFO) {
506     return 0;
507   }
508   if (vtype == ASCII_FILE_VIEWER) {
509     PetscSequentialPhaseBegin(mat->comm,1);
510     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
511              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
512     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
513     fflush(fd);
514     PetscSequentialPhaseEnd(mat->comm,1);
515   }
516   else {
517     int size = mdn->size, rank = mdn->rank;
518     if (size == 1) {
519       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
520     }
521     else {
522       /* assemble the entire matrix onto first processor. */
523       Mat          A;
524       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
525       Scalar       *vals;
526       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
527 
528       if (!rank) {
529         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
530       }
531       else {
532         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
533       }
534       PLogObjectParent(mat,A);
535 
536       /* Copy the matrix ... This isn't the most efficient means,
537          but it's quick for now */
538       row = mdn->rstart; m = Amdn->m;
539       for ( i=0; i<m; i++ ) {
540         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
541         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
542         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
543         row++;
544       }
545 
546       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
547       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
548       if (!rank) {
549         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
550       }
551       ierr = MatDestroy(A); CHKERRQ(ierr);
552     }
553   }
554   return 0;
555 }
556 
557 static int MatView_MPIDense(PetscObject obj,Viewer viewer)
558 {
559   Mat          mat = (Mat) obj;
560   int          ierr;
561   ViewerType   vtype;
562 
563   if (!viewer) {
564     viewer = STDOUT_VIEWER_SELF;
565   }
566   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
567   if (vtype == ASCII_FILE_VIEWER) {
568     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
569   }
570   else if (vtype == ASCII_FILES_VIEWER) {
571     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
572   }
573   else if (vtype == BINARY_FILE_VIEWER) {
574     return MatView_MPIDense_Binary(mat,viewer);
575   }
576   return 0;
577 }
578 
579 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
580                              int *nzalloc,int *mem)
581 {
582   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
583   Mat          mdn = mat->A;
584   int          ierr, isend[3], irecv[3];
585 
586   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
587   if (flag == MAT_LOCAL) {
588     if (nz)      *nz      = isend[0];
589     if (nzalloc) *nzalloc = isend[1];
590     if (mem)     *mem     = isend[2];
591   } else if (flag == MAT_GLOBAL_MAX) {
592     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
593     if (nz)      *nz      = irecv[0];
594     if (nzalloc) *nzalloc = irecv[1];
595     if (mem)     *mem     = irecv[2];
596   } else if (flag == MAT_GLOBAL_SUM) {
597     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
598     if (nz)      *nz      = irecv[0];
599     if (nzalloc) *nzalloc = irecv[1];
600     if (mem)     *mem     = irecv[2];
601   }
602   return 0;
603 }
604 
605 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
606    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
607    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
608    extern int MatSolve_MPIDense(Mat,Vec,Vec);
609    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
610    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
611    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
612 
613 static int MatSetOption_MPIDense(Mat A,MatOption op)
614 {
615   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
616 
617   if (op == NO_NEW_NONZERO_LOCATIONS ||
618       op == YES_NEW_NONZERO_LOCATIONS ||
619       op == COLUMNS_SORTED ||
620       op == ROW_ORIENTED) {
621         MatSetOption(a->A,op);
622   }
623   else if (op == ROWS_SORTED ||
624            op == SYMMETRIC_MATRIX ||
625            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
626            op == YES_NEW_DIAGONALS)
627     PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n");
628   else if (op == COLUMN_ORIENTED)
629     { a->roworiented = 0; MatSetOption(a->A,op);}
630   else if (op == NO_NEW_DIAGONALS)
631     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
632   else
633     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
634   return 0;
635 }
636 
637 static int MatGetSize_MPIDense(Mat A,int *m,int *n)
638 {
639   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
640   *m = mat->M; *n = mat->N;
641   return 0;
642 }
643 
644 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
645 {
646   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
647   *m = mat->m; *n = mat->N;
648   return 0;
649 }
650 
651 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
652 {
653   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
654   *m = mat->rstart; *n = mat->rend;
655   return 0;
656 }
657 
658 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
659 {
660   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
661   int          lrow, rstart = mat->rstart, rend = mat->rend;
662 
663   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
664   lrow = row - rstart;
665   return MatGetRow(mat->A,lrow,nz,idx,v);
666 }
667 
668 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
669 {
670   if (idx) PetscFree(*idx);
671   if (v) PetscFree(*v);
672   return 0;
673 }
674 
675 static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
676 {
677   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
678   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
679   int          ierr, i, j;
680   double       sum = 0.0;
681   Scalar       *v = mat->v;
682 
683   if (mdn->size == 1) {
684     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
685   } else {
686     if (type == NORM_FROBENIUS) {
687       for (i=0; i<mat->n*mat->m; i++ ) {
688 #if defined(PETSC_COMPLEX)
689         sum += real(conj(*v)*(*v)); v++;
690 #else
691         sum += (*v)*(*v); v++;
692 #endif
693       }
694       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
695       *norm = sqrt(*norm);
696       PLogFlops(2*mat->n*mat->m);
697     }
698     else if (type == NORM_1) {
699       double *tmp, *tmp2;
700       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
701       tmp2 = tmp + mdn->N;
702       PetscMemzero(tmp,2*mdn->N*sizeof(double));
703       *norm = 0.0;
704       v = mat->v;
705       for ( j=0; j<mat->n; j++ ) {
706         for ( i=0; i<mat->m; i++ ) {
707           tmp[j] += PetscAbsScalar(*v);  v++;
708         }
709       }
710       MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
711       for ( j=0; j<mdn->N; j++ ) {
712         if (tmp2[j] > *norm) *norm = tmp2[j];
713       }
714       PetscFree(tmp);
715       PLogFlops(mat->n*mat->m);
716     }
717     else if (type == NORM_INFINITY) { /* max row norm */
718       double ntemp;
719       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
720       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
721     }
722     else {
723       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
724     }
725   }
726   return 0;
727 }
728 
729 static int MatTranspose_MPIDense(Mat A,Mat *matout)
730 {
731   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
732   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
733   Mat          B;
734   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
735   int          j, i, ierr;
736   Scalar       *v;
737 
738   if (matout == PETSC_NULL && M != N)
739     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
740   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);
741          CHKERRQ(ierr);
742 
743   m = Aloc->m; n = Aloc->n; v = Aloc->v;
744   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
745   for ( j=0; j<n; j++ ) {
746     for (i=0; i<m; i++) rwork[i] = rstart + i;
747     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
748     v += m;
749   }
750   PetscFree(rwork);
751   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
752   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
753   if (matout != PETSC_NULL) {
754     *matout = B;
755   } else {
756     /* This isn't really an in-place transpose, but free data struct from a */
757     PetscFree(a->rowners);
758     ierr = MatDestroy(a->A); CHKERRQ(ierr);
759     if (a->lvec) VecDestroy(a->lvec);
760     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
761     PetscFree(a);
762     PetscMemcpy(A,B,sizeof(struct _Mat));
763     PetscHeaderDestroy(B);
764   }
765   return 0;
766 }
767 
768 #include "pinclude/plapack.h"
769 static int MatScale_MPIDense(Scalar *alpha,Mat inA)
770 {
771   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
772   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
773   int          one = 1, nz;
774 
775   nz = a->m*a->n;
776   BLscal_( &nz, alpha, a->v, &one );
777   PLogFlops(nz);
778   return 0;
779 }
780 
781 static int MatConvertSameType_MPIDense(Mat,Mat *,int);
782 
783 /* -------------------------------------------------------------------*/
784 static struct _MatOps MatOps = {MatSetValues_MPIDense,
785        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
786        MatMult_MPIDense,MatMultAdd_MPIDense,
787        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
788 /*       MatSolve_MPIDense,0, */
789        0,0,
790        0,0,
791        0,0,
792 /*       MatLUFactor_MPIDense,0, */
793        0,MatTranspose_MPIDense,
794        MatGetInfo_MPIDense,0,
795        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
796        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
797        0,
798        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
799        0,0,0,
800 /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
801        0,0,
802        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
803        MatGetOwnershipRange_MPIDense,
804        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
805        0,0,0,MatConvertSameType_MPIDense,
806        0,0,0,0,0,
807        0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense};
808 
809 /*@C
810    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
811 
812    Input Parameters:
813 .  comm - MPI communicator
814 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
815 .  n - number of local columns (or PETSC_DECIDE to have calculated
816            if N is given)
817 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
818 .  N - number of global columns (or PETSC_DECIDE to have calculated
819            if n is given)
820 .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
821    to control all matrix memory allocation.
822 
823    Output Parameter:
824 .  A - the matrix
825 
826    Notes:
827    The dense format is fully compatible with standard Fortran 77
828    storage by columns.
829 
830    The data input variable is intended primarily for Fortran programmers
831    who wish to allocate their own matrix memory space.  Most users should
832    set data=PETSC_NULL.
833 
834    The user MUST specify either the local or global matrix dimensions
835    (possibly both).
836 
837    Currently, the only parallel dense matrix decomposition is by rows,
838    so that n=N and each submatrix owns all of the global columns.
839 
840 .keywords: matrix, dense, parallel
841 
842 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
843 @*/
844 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
845 {
846   Mat          mat;
847   Mat_MPIDense *a;
848   int          ierr, i,flg;
849 
850   /* Note:  For now, when data is specified above, this assumes the user correctly
851    allocates the local dense storage space.  We should add error checking. */
852 
853   *A = 0;
854   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
855   PLogObjectCreate(mat);
856   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
857   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
858   mat->destroy    = MatDestroy_MPIDense;
859   mat->view       = MatView_MPIDense;
860   mat->factor     = 0;
861 
862   a->factor = 0;
863   a->insertmode = NOT_SET_VALUES;
864   MPI_Comm_rank(comm,&a->rank);
865   MPI_Comm_size(comm,&a->size);
866 
867   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
868   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
869 
870   /* each row stores all columns */
871   if (N == PETSC_DECIDE) N = n;
872   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
873   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
874   a->N = mat->N = N;
875   a->M = mat->M = M;
876   a->m = mat->m = m;
877   a->n = mat->n = n;
878 
879   /* build local table of row and column ownerships */
880   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
881   a->cowners = a->rowners + a->size + 1;
882   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
883                        sizeof(Mat_MPIDense));
884   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
885   a->rowners[0] = 0;
886   for ( i=2; i<=a->size; i++ ) {
887     a->rowners[i] += a->rowners[i-1];
888   }
889   a->rstart = a->rowners[a->rank];
890   a->rend   = a->rowners[a->rank+1];
891   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
892   a->cowners[0] = 0;
893   for ( i=2; i<=a->size; i++ ) {
894     a->cowners[i] += a->cowners[i-1];
895   }
896 
897   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
898   PLogObjectParent(mat,a->A);
899 
900   /* build cache for off array entries formed */
901   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
902 
903   /* stuff used for matrix vector multiply */
904   a->lvec        = 0;
905   a->Mvctx       = 0;
906   a->roworiented = 1;
907 
908   *A = mat;
909   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
910   if (flg) {
911     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
912   }
913   return 0;
914 }
915 
916 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
917 {
918   Mat          mat;
919   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
920   int          ierr;
921   FactorCtx    *factor;
922 
923   *newmat       = 0;
924   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
925   PLogObjectCreate(mat);
926   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
927   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
928   mat->destroy  = MatDestroy_MPIDense;
929   mat->view     = MatView_MPIDense;
930   mat->factor   = A->factor;
931   mat->assembled  = PETSC_TRUE;
932 
933   a->m = mat->m = oldmat->m;
934   a->n = mat->n = oldmat->n;
935   a->M = mat->M = oldmat->M;
936   a->N = mat->N = oldmat->N;
937   if (oldmat->factor) {
938     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
939     /* copy factor contents ... add this code! */
940   } else a->factor = 0;
941 
942   a->rstart     = oldmat->rstart;
943   a->rend       = oldmat->rend;
944   a->size       = oldmat->size;
945   a->rank       = oldmat->rank;
946   a->insertmode = NOT_SET_VALUES;
947 
948   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
949   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
950   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
951   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
952 
953   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
954   PLogObjectParent(mat,a->lvec);
955   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
956   PLogObjectParent(mat,a->Mvctx);
957   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
958   PLogObjectParent(mat,a->A);
959   *newmat = mat;
960   return 0;
961 }
962 
963 #include "sys.h"
964 
965 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
966 {
967   int        *rowners, i,size,rank,m,rstart,rend,ierr,nz,j;
968   Scalar     *array,*vals,*vals_ptr;
969   MPI_Status status;
970 
971   MPI_Comm_rank(comm,&rank);
972   MPI_Comm_size(comm,&size);
973 
974   /* determine ownership of all rows */
975   m = M/size + ((M % size) > rank);
976   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
977   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
978   rowners[0] = 0;
979   for ( i=2; i<=size; i++ ) {
980     rowners[i] += rowners[i-1];
981   }
982   rstart = rowners[rank];
983   rend   = rowners[rank+1];
984 
985   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
986   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
987 
988   if (!rank) {
989     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
990 
991     /* read in my part of the matrix numerical values  */
992     ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr);
993 
994     /* insert into matrix-by row (this is why cannot directly read into array */
995     vals_ptr = vals;
996     for ( i=0; i<m; i++ ) {
997       for ( j=0; j<N; j++ ) {
998         array[i + j*m] = *vals_ptr++;
999       }
1000     }
1001 
1002     /* read in other processors and ship out */
1003     for ( i=1; i<size; i++ ) {
1004       nz   = (rowners[i+1] - rowners[i])*N;
1005       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1006       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1007     }
1008   }
1009   else {
1010     /* receive numeric values */
1011     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1012 
1013     /* receive message of values*/
1014     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1015 
1016     /* insert into matrix-by row (this is why cannot directly read into array */
1017     vals_ptr = vals;
1018     for ( i=0; i<m; i++ ) {
1019       for ( j=0; j<N; j++ ) {
1020         array[i + j*m] = *vals_ptr++;
1021       }
1022     }
1023   }
1024   PetscFree(rowners);
1025   PetscFree(vals);
1026   ierr = MatAssemblyBegin(*newmat,FINAL_ASSEMBLY); CHKERRQ(ierr);
1027   ierr = MatAssemblyEnd(*newmat,FINAL_ASSEMBLY); CHKERRQ(ierr);
1028   return 0;
1029 }
1030 
1031 
1032 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1033 {
1034   Mat          A;
1035   int          i, nz, ierr, j,rstart, rend, fd;
1036   Scalar       *vals,*svals;
1037   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1038   MPI_Status   status;
1039   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1040   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1041   int          tag = ((PetscObject)viewer)->tag;
1042 
1043   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1044   if (!rank) {
1045     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1046     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1047     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
1048   }
1049 
1050   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1051   M = header[1]; N = header[2]; nz = header[3];
1052 
1053   /*
1054        Handle case where matrix is stored on disk as a dense matrix
1055   */
1056   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1057     return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1058   }
1059 
1060   /* determine ownership of all rows */
1061   m = M/size + ((M % size) > rank);
1062   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1063   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1064   rowners[0] = 0;
1065   for ( i=2; i<=size; i++ ) {
1066     rowners[i] += rowners[i-1];
1067   }
1068   rstart = rowners[rank];
1069   rend   = rowners[rank+1];
1070 
1071   /* distribute row lengths to all processors */
1072   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1073   offlens = ourlens + (rend-rstart);
1074   if (!rank) {
1075     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1076     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1077     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1078     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1079     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1080     PetscFree(sndcounts);
1081   }
1082   else {
1083     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1084   }
1085 
1086   if (!rank) {
1087     /* calculate the number of nonzeros on each processor */
1088     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1089     PetscMemzero(procsnz,size*sizeof(int));
1090     for ( i=0; i<size; i++ ) {
1091       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1092         procsnz[i] += rowlengths[j];
1093       }
1094     }
1095     PetscFree(rowlengths);
1096 
1097     /* determine max buffer needed and allocate it */
1098     maxnz = 0;
1099     for ( i=0; i<size; i++ ) {
1100       maxnz = PetscMax(maxnz,procsnz[i]);
1101     }
1102     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1103 
1104     /* read in my part of the matrix column indices  */
1105     nz = procsnz[0];
1106     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1107     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1108 
1109     /* read in every one elses and ship off */
1110     for ( i=1; i<size; i++ ) {
1111       nz = procsnz[i];
1112       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1113       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1114     }
1115     PetscFree(cols);
1116   }
1117   else {
1118     /* determine buffer space needed for message */
1119     nz = 0;
1120     for ( i=0; i<m; i++ ) {
1121       nz += ourlens[i];
1122     }
1123     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1124 
1125     /* receive message of column indices*/
1126     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1127     MPI_Get_count(&status,MPI_INT,&maxnz);
1128     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1129   }
1130 
1131   /* loop over local rows, determining number of off diagonal entries */
1132   PetscMemzero(offlens,m*sizeof(int));
1133   jj = 0;
1134   for ( i=0; i<m; i++ ) {
1135     for ( j=0; j<ourlens[i]; j++ ) {
1136       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1137       jj++;
1138     }
1139   }
1140 
1141   /* create our matrix */
1142   for ( i=0; i<m; i++ ) {
1143     ourlens[i] -= offlens[i];
1144   }
1145   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1146   A = *newmat;
1147   for ( i=0; i<m; i++ ) {
1148     ourlens[i] += offlens[i];
1149   }
1150 
1151   if (!rank) {
1152     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1153 
1154     /* read in my part of the matrix numerical values  */
1155     nz = procsnz[0];
1156     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1157 
1158     /* insert into matrix */
1159     jj      = rstart;
1160     smycols = mycols;
1161     svals   = vals;
1162     for ( i=0; i<m; i++ ) {
1163       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1164       smycols += ourlens[i];
1165       svals   += ourlens[i];
1166       jj++;
1167     }
1168 
1169     /* read in other processors and ship out */
1170     for ( i=1; i<size; i++ ) {
1171       nz = procsnz[i];
1172       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1173       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1174     }
1175     PetscFree(procsnz);
1176   }
1177   else {
1178     /* receive numeric values */
1179     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1180 
1181     /* receive message of values*/
1182     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1183     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1184     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1185 
1186     /* insert into matrix */
1187     jj      = rstart;
1188     smycols = mycols;
1189     svals   = vals;
1190     for ( i=0; i<m; i++ ) {
1191       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1192       smycols += ourlens[i];
1193       svals   += ourlens[i];
1194       jj++;
1195     }
1196   }
1197   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1198 
1199   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1200   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1201   return 0;
1202 }
1203 
1204 
1205 
1206 
1207 
1208