xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 96d09e227e0e753a6888f217ccd1cd5469fdb59e)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.57 1996/11/30 21:47:49 curfman Exp curfman $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
13 {
14   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
15   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
16   int          roworiented = A->roworiented;
17 
18   if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) {
19     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
20   }
21   A->insertmode = addv;
22   for ( i=0; i<m; i++ ) {
23     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
24     if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
25     if (idxm[i] >= rstart && idxm[i] < rend) {
26       row = idxm[i] - rstart;
27       if (roworiented) {
28         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
29       }
30       else {
31         for ( j=0; j<n; j++ ) {
32           if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
33           if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
34           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
35         }
36       }
37     }
38     else {
39       if (roworiented) {
40         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
41       }
42       else { /* must stash each seperately */
43         row = idxm[i];
44         for ( j=0; j<n; j++ ) {
45           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
46         }
47       }
48     }
49   }
50   return 0;
51 }
52 
53 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
54 {
55   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
56   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
57 
58   for ( i=0; i<m; i++ ) {
59     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
60     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
61     if (idxm[i] >= rstart && idxm[i] < rend) {
62       row = idxm[i] - rstart;
63       for ( j=0; j<n; j++ ) {
64         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
65         if (idxn[j] >= mdn->N)
66           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
67         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
68       }
69     }
70     else {
71       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
72     }
73   }
74   return 0;
75 }
76 
77 static int MatGetArray_MPIDense(Mat A,Scalar **array)
78 {
79   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
80   int ierr;
81 
82   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
83   return 0;
84 }
85 
86 static int MatRestoreArray_MPIDense(Mat A,Scalar **array)
87 {
88   return 0;
89 }
90 
91 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
92 {
93   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
94   MPI_Comm     comm = mat->comm;
95   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
96   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
97   int          tag = mat->tag, *owner,*starts,count,ierr;
98   InsertMode   addv;
99   MPI_Request  *send_waits,*recv_waits;
100   Scalar       *rvalues,*svalues;
101 
102   /* make sure all processors are either in INSERTMODE or ADDMODE */
103   MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
104   if (addv == (ADD_VALUES|INSERT_VALUES)) {
105     SETERRQ(1,"MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
106   }
107   mdn->insertmode = addv; /* in case this processor had no cache */
108 
109   /*  first count number of contributors to each processor */
110   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
111   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
112   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
113   for ( i=0; i<mdn->stash.n; i++ ) {
114     idx = mdn->stash.idx[i];
115     for ( j=0; j<size; j++ ) {
116       if (idx >= owners[j] && idx < owners[j+1]) {
117         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
118       }
119     }
120   }
121   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
122 
123   /* inform other processors of number of messages and max length*/
124   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
125   MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);
126   nreceives = work[rank];
127   if (nreceives > size) SETERRQ(1,"MatAssemblyBegin_MPIDense:Internal PETSc error");
128   MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);
129   nmax = work[rank];
130   PetscFree(work);
131 
132   /* post receives:
133        1) each message will consist of ordered pairs
134      (global index,value) we store the global index as a double
135      to simplify the message passing.
136        2) since we don't know how long each individual message is we
137      allocate the largest needed buffer for each receive. Potentially
138      this is a lot of wasted space.
139 
140        This could be done better.
141   */
142   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
143   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
144   for ( i=0; i<nreceives; i++ ) {
145     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
146   }
147 
148   /* do sends:
149       1) starts[i] gives the starting index in svalues for stuff going to
150          the ith processor
151   */
152   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
153   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
154   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
155   starts[0] = 0;
156   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
157   for ( i=0; i<mdn->stash.n; i++ ) {
158     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
159     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
160     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
161   }
162   PetscFree(owner);
163   starts[0] = 0;
164   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
165   count = 0;
166   for ( i=0; i<size; i++ ) {
167     if (procs[i]) {
168       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);
169     }
170   }
171   PetscFree(starts); PetscFree(nprocs);
172 
173   /* Free cache space */
174   PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n);
175   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
176 
177   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
178   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
179   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
180   mdn->rmax       = nmax;
181 
182   return 0;
183 }
184 extern int MatSetUpMultiply_MPIDense(Mat);
185 
186 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
187 {
188   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
189   MPI_Status   *send_status,recv_status;
190   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
191   Scalar       *values,val;
192   InsertMode   addv = mdn->insertmode;
193 
194   /*  wait on receives */
195   while (count) {
196     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
197     /* unpack receives into our local space */
198     values = mdn->rvalues + 3*imdex*mdn->rmax;
199     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
200     n = n/3;
201     for ( i=0; i<n; i++ ) {
202       row = (int) PetscReal(values[3*i]) - mdn->rstart;
203       col = (int) PetscReal(values[3*i+1]);
204       val = values[3*i+2];
205       if (col >= 0 && col < mdn->N) {
206         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
207       }
208       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
209     }
210     count--;
211   }
212   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
213 
214   /* wait on sends */
215   if (mdn->nsends) {
216     send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
217     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
218     PetscFree(send_status);
219   }
220   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
221 
222   mdn->insertmode = NOT_SET_VALUES;
223   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
224   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
225 
226   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
227     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
228   }
229   return 0;
230 }
231 
232 static int MatZeroEntries_MPIDense(Mat A)
233 {
234   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
235   return MatZeroEntries(l->A);
236 }
237 
238 static int MatGetBlockSize_MPIDense(Mat A,int *bs)
239 {
240   *bs = 1;
241   return 0;
242 }
243 
244 /* the code does not do the diagonal entries correctly unless the
245    matrix is square and the column and row owerships are identical.
246    This is a BUG. The only way to fix it seems to be to access
247    mdn->A and mdn->B directly and not through the MatZeroRows()
248    routine.
249 */
250 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
251 {
252   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
253   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
254   int            *procs,*nprocs,j,found,idx,nsends,*work;
255   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
256   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
257   int            *lens,imdex,*lrows,*values;
258   MPI_Comm       comm = A->comm;
259   MPI_Request    *send_waits,*recv_waits;
260   MPI_Status     recv_status,*send_status;
261   IS             istmp;
262 
263   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
264   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
265 
266   /*  first count number of contributors to each processor */
267   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
268   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
269   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
270   for ( i=0; i<N; i++ ) {
271     idx = rows[i];
272     found = 0;
273     for ( j=0; j<size; j++ ) {
274       if (idx >= owners[j] && idx < owners[j+1]) {
275         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
276       }
277     }
278     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
279   }
280   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
281 
282   /* inform other processors of number of messages and max length*/
283   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
284   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
285   nrecvs = work[rank];
286   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
287   nmax = work[rank];
288   PetscFree(work);
289 
290   /* post receives:   */
291   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
292   CHKPTRQ(rvalues);
293   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
294   CHKPTRQ(recv_waits);
295   for ( i=0; i<nrecvs; i++ ) {
296     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
297   }
298 
299   /* do sends:
300       1) starts[i] gives the starting index in svalues for stuff going to
301          the ith processor
302   */
303   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
304   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
305   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
306   starts[0]  = 0;
307   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
308   for ( i=0; i<N; i++ ) {
309     svalues[starts[owner[i]]++] = rows[i];
310   }
311   ISRestoreIndices(is,&rows);
312 
313   starts[0] = 0;
314   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
315   count = 0;
316   for ( i=0; i<size; i++ ) {
317     if (procs[i]) {
318       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
319     }
320   }
321   PetscFree(starts);
322 
323   base = owners[rank];
324 
325   /*  wait on receives */
326   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
327   source = lens + nrecvs;
328   count  = nrecvs; slen = 0;
329   while (count) {
330     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
331     /* unpack receives into our local space */
332     MPI_Get_count(&recv_status,MPI_INT,&n);
333     source[imdex]  = recv_status.MPI_SOURCE;
334     lens[imdex]  = n;
335     slen += n;
336     count--;
337   }
338   PetscFree(recv_waits);
339 
340   /* move the data into the send scatter */
341   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
342   count = 0;
343   for ( i=0; i<nrecvs; i++ ) {
344     values = rvalues + i*nmax;
345     for ( j=0; j<lens[i]; j++ ) {
346       lrows[count++] = values[j] - base;
347     }
348   }
349   PetscFree(rvalues); PetscFree(lens);
350   PetscFree(owner); PetscFree(nprocs);
351 
352   /* actually zap the local rows */
353   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
354   PLogObjectParent(A,istmp);
355   PetscFree(lrows);
356   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
357   ierr = ISDestroy(istmp); CHKERRQ(ierr);
358 
359   /* wait on sends */
360   if (nsends) {
361     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
362     MPI_Waitall(nsends,send_waits,send_status);
363     PetscFree(send_status);
364   }
365   PetscFree(send_waits); PetscFree(svalues);
366 
367   return 0;
368 }
369 
370 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
371 {
372   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
373   int          ierr;
374 
375   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
376   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
377   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
378   return 0;
379 }
380 
381 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
382 {
383   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
384   int          ierr;
385 
386   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
387   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
388   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
389   return 0;
390 }
391 
392 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
395   int          ierr;
396   Scalar       zero = 0.0;
397 
398   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
399   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
400   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
401   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
402   return 0;
403 }
404 
405 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
406 {
407   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
408   int          ierr;
409 
410   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
411   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
412   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
413   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
414   return 0;
415 }
416 
417 static int MatGetDiagonal_MPIDense(Mat A,Vec v)
418 {
419   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
420   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
421   int          ierr, len, i, n, m = a->m, radd;
422   Scalar       *x, zero = 0.0;
423 
424   VecSet(&zero,v);
425   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
426   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
427   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
428   len = PetscMin(aloc->m,aloc->n);
429   radd = a->rstart*m;
430   for ( i=0; i<len; i++ ) {
431     x[i] = aloc->v[radd + i*m + i];
432   }
433   return 0;
434 }
435 
436 static int MatDestroy_MPIDense(PetscObject obj)
437 {
438   Mat          mat = (Mat) obj;
439   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
440   int          ierr;
441 
442 #if defined(PETSC_LOG)
443   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
444 #endif
445   PetscFree(mdn->rowners);
446   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
447   if (mdn->lvec)   VecDestroy(mdn->lvec);
448   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
449   if (mdn->factor) {
450     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
451     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
452     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
453     PetscFree(mdn->factor);
454   }
455   PetscFree(mdn);
456   if (mat->mapping) {
457     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
458   }
459   PLogObjectDestroy(mat);
460   PetscHeaderDestroy(mat);
461   return 0;
462 }
463 
464 #include "pinclude/pviewer.h"
465 
466 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
467 {
468   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
469   int          ierr;
470 
471   if (mdn->size == 1) {
472     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
473   }
474   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
475   return 0;
476 }
477 
478 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
479 {
480   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
481   int          ierr, format;
482   FILE         *fd;
483   ViewerType   vtype;
484 
485   ViewerGetType(viewer,&vtype);
486   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
487   ierr = ViewerGetFormat(viewer,&format);
488   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
489     int rank;
490     MatInfo info;
491     MPI_Comm_rank(mat->comm,&rank);
492     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
493     PetscSequentialPhaseBegin(mat->comm,1);
494       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
495          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
496       fflush(fd);
497     PetscSequentialPhaseEnd(mat->comm,1);
498     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
499     return 0;
500   }
501   else if (format == VIEWER_FORMAT_ASCII_INFO) {
502     return 0;
503   }
504   if (vtype == ASCII_FILE_VIEWER) {
505     PetscSequentialPhaseBegin(mat->comm,1);
506     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
507              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
508     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
509     fflush(fd);
510     PetscSequentialPhaseEnd(mat->comm,1);
511   }
512   else {
513     int size = mdn->size, rank = mdn->rank;
514     if (size == 1) {
515       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
516     }
517     else {
518       /* assemble the entire matrix onto first processor. */
519       Mat          A;
520       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
521       Scalar       *vals;
522       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
523 
524       if (!rank) {
525         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
526       }
527       else {
528         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
529       }
530       PLogObjectParent(mat,A);
531 
532       /* Copy the matrix ... This isn't the most efficient means,
533          but it's quick for now */
534       row = mdn->rstart; m = Amdn->m;
535       for ( i=0; i<m; i++ ) {
536         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
537         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
538         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
539         row++;
540       }
541 
542       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
543       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
544       if (!rank) {
545         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
546       }
547       ierr = MatDestroy(A); CHKERRQ(ierr);
548     }
549   }
550   return 0;
551 }
552 
553 static int MatView_MPIDense(PetscObject obj,Viewer viewer)
554 {
555   Mat          mat = (Mat) obj;
556   int          ierr;
557   ViewerType   vtype;
558 
559   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
560   if (vtype == ASCII_FILE_VIEWER) {
561     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
562   }
563   else if (vtype == ASCII_FILES_VIEWER) {
564     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
565   }
566   else if (vtype == BINARY_FILE_VIEWER) {
567     return MatView_MPIDense_Binary(mat,viewer);
568   }
569   return 0;
570 }
571 
572 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
573 {
574   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
575   Mat          mdn = mat->A;
576   int          ierr;
577   double       isend[5], irecv[5];
578 
579   info->rows_global    = (double)mat->M;
580   info->columns_global = (double)mat->N;
581   info->rows_local     = (double)mat->m;
582   info->columns_local  = (double)mat->N;
583   info->block_size     = 1.0;
584   ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr);
585   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
586   isend[3] = info->memory;  isend[4] = info->mallocs;
587   if (flag == MAT_LOCAL) {
588     info->nz_used      = isend[0];
589     info->nz_allocated = isend[1];
590     info->nz_unneeded  = isend[2];
591     info->memory       = isend[3];
592     info->mallocs      = isend[4];
593   } else if (flag == MAT_GLOBAL_MAX) {
594     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
595     info->nz_used      = irecv[0];
596     info->nz_allocated = irecv[1];
597     info->nz_unneeded  = irecv[2];
598     info->memory       = irecv[3];
599     info->mallocs      = irecv[4];
600   } else if (flag == MAT_GLOBAL_SUM) {
601     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
602     info->nz_used      = irecv[0];
603     info->nz_allocated = irecv[1];
604     info->nz_unneeded  = irecv[2];
605     info->memory       = irecv[3];
606     info->mallocs      = irecv[4];
607   }
608   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
609   info->fill_ratio_needed = 0;
610   info->factor_mallocs    = 0;
611   return 0;
612 }
613 
614 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
615    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
616    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
617    extern int MatSolve_MPIDense(Mat,Vec,Vec);
618    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
619    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
620    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
621 
622 static int MatSetOption_MPIDense(Mat A,MatOption op)
623 {
624   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
625 
626   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
627       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
628       op == MAT_COLUMNS_SORTED ||
629       op == MAT_COLUMNS_UNSORTED) {
630         MatSetOption(a->A,op);
631   } else if (op == MAT_ROW_ORIENTED) {
632         a->roworiented = 1;
633         MatSetOption(a->A,op);
634   } else if (op == MAT_ROWS_SORTED ||
635              op == MAT_ROWS_UNSORTED ||
636              op == MAT_SYMMETRIC ||
637              op == MAT_STRUCTURALLY_SYMMETRIC ||
638              op == MAT_YES_NEW_DIAGONALS)
639     PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n");
640   else if (op == MAT_COLUMN_ORIENTED)
641     {a->roworiented = 0; MatSetOption(a->A,op);}
642   else if (op == MAT_NO_NEW_DIAGONALS)
643     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:MAT_NO_NEW_DIAGONALS");}
644   else
645     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
646   return 0;
647 }
648 
649 static int MatGetSize_MPIDense(Mat A,int *m,int *n)
650 {
651   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
652   *m = mat->M; *n = mat->N;
653   return 0;
654 }
655 
656 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
657 {
658   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
659   *m = mat->m; *n = mat->N;
660   return 0;
661 }
662 
663 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
664 {
665   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
666   *m = mat->rstart; *n = mat->rend;
667   return 0;
668 }
669 
670 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
671 {
672   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
673   int          lrow, rstart = mat->rstart, rend = mat->rend;
674 
675   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
676   lrow = row - rstart;
677   return MatGetRow(mat->A,lrow,nz,idx,v);
678 }
679 
680 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
681 {
682   if (idx) PetscFree(*idx);
683   if (v) PetscFree(*v);
684   return 0;
685 }
686 
687 static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
688 {
689   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
690   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
691   int          ierr, i, j;
692   double       sum = 0.0;
693   Scalar       *v = mat->v;
694 
695   if (mdn->size == 1) {
696     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
697   } else {
698     if (type == NORM_FROBENIUS) {
699       for (i=0; i<mat->n*mat->m; i++ ) {
700 #if defined(PETSC_COMPLEX)
701         sum += real(conj(*v)*(*v)); v++;
702 #else
703         sum += (*v)*(*v); v++;
704 #endif
705       }
706       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
707       *norm = sqrt(*norm);
708       PLogFlops(2*mat->n*mat->m);
709     }
710     else if (type == NORM_1) {
711       double *tmp, *tmp2;
712       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
713       tmp2 = tmp + mdn->N;
714       PetscMemzero(tmp,2*mdn->N*sizeof(double));
715       *norm = 0.0;
716       v = mat->v;
717       for ( j=0; j<mat->n; j++ ) {
718         for ( i=0; i<mat->m; i++ ) {
719           tmp[j] += PetscAbsScalar(*v);  v++;
720         }
721       }
722       MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
723       for ( j=0; j<mdn->N; j++ ) {
724         if (tmp2[j] > *norm) *norm = tmp2[j];
725       }
726       PetscFree(tmp);
727       PLogFlops(mat->n*mat->m);
728     }
729     else if (type == NORM_INFINITY) { /* max row norm */
730       double ntemp;
731       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
732       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
733     }
734     else {
735       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
736     }
737   }
738   return 0;
739 }
740 
741 static int MatTranspose_MPIDense(Mat A,Mat *matout)
742 {
743   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
744   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
745   Mat          B;
746   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
747   int          j, i, ierr;
748   Scalar       *v;
749 
750   if (matout == PETSC_NULL && M != N) {
751     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
752   }
753   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
754 
755   m = Aloc->m; n = Aloc->n; v = Aloc->v;
756   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
757   for ( j=0; j<n; j++ ) {
758     for (i=0; i<m; i++) rwork[i] = rstart + i;
759     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
760     v   += m;
761   }
762   PetscFree(rwork);
763   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
764   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
765   if (matout != PETSC_NULL) {
766     *matout = B;
767   } else {
768     /* This isn't really an in-place transpose, but free data struct from a */
769     PetscFree(a->rowners);
770     ierr = MatDestroy(a->A); CHKERRQ(ierr);
771     if (a->lvec) VecDestroy(a->lvec);
772     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
773     PetscFree(a);
774     PetscMemcpy(A,B,sizeof(struct _Mat));
775     PetscHeaderDestroy(B);
776   }
777   return 0;
778 }
779 
780 #include "pinclude/plapack.h"
781 static int MatScale_MPIDense(Scalar *alpha,Mat inA)
782 {
783   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
784   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
785   int          one = 1, nz;
786 
787   nz = a->m*a->n;
788   BLscal_( &nz, alpha, a->v, &one );
789   PLogFlops(nz);
790   return 0;
791 }
792 
793 static int MatConvertSameType_MPIDense(Mat,Mat *,int);
794 
795 /* -------------------------------------------------------------------*/
796 static struct _MatOps MatOps = {MatSetValues_MPIDense,
797        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
798        MatMult_MPIDense,MatMultAdd_MPIDense,
799        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
800 /*       MatSolve_MPIDense,0, */
801        0,0,
802        0,0,
803        0,0,
804 /*       MatLUFactor_MPIDense,0, */
805        0,MatTranspose_MPIDense,
806        MatGetInfo_MPIDense,0,
807        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
808        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
809        0,
810        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
811        0,0,
812 /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
813        0,0,
814        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
815        MatGetOwnershipRange_MPIDense,
816        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
817        0,MatConvertSameType_MPIDense,
818        0,0,0,0,0,
819        0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense,
820        0,0,0,MatGetBlockSize_MPIDense};
821 
822 /*@C
823    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
824 
825    Input Parameters:
826 .  comm - MPI communicator
827 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
828 .  n - number of local columns (or PETSC_DECIDE to have calculated
829            if N is given)
830 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
831 .  N - number of global columns (or PETSC_DECIDE to have calculated
832            if n is given)
833 .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
834    to control all matrix memory allocation.
835 
836    Output Parameter:
837 .  A - the matrix
838 
839    Notes:
840    The dense format is fully compatible with standard Fortran 77
841    storage by columns.
842 
843    The data input variable is intended primarily for Fortran programmers
844    who wish to allocate their own matrix memory space.  Most users should
845    set data=PETSC_NULL.
846 
847    The user MUST specify either the local or global matrix dimensions
848    (possibly both).
849 
850    Currently, the only parallel dense matrix decomposition is by rows,
851    so that n=N and each submatrix owns all of the global columns.
852 
853 .keywords: matrix, dense, parallel
854 
855 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
856 @*/
857 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
858 {
859   Mat          mat;
860   Mat_MPIDense *a;
861   int          ierr, i,flg;
862 
863   /* Note:  For now, when data is specified above, this assumes the user correctly
864    allocates the local dense storage space.  We should add error checking. */
865 
866   *A = 0;
867   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
868   PLogObjectCreate(mat);
869   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
870   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
871   mat->destroy    = MatDestroy_MPIDense;
872   mat->view       = MatView_MPIDense;
873   mat->factor     = 0;
874   mat->mapping    = 0;
875 
876   a->factor       = 0;
877   a->insertmode   = NOT_SET_VALUES;
878   MPI_Comm_rank(comm,&a->rank);
879   MPI_Comm_size(comm,&a->size);
880 
881   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
882   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
883 
884   /* each row stores all columns */
885   if (N == PETSC_DECIDE) N = n;
886   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
887   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
888   a->N = mat->N = N;
889   a->M = mat->M = M;
890   a->m = mat->m = m;
891   a->n = mat->n = n;
892 
893   /* build local table of row and column ownerships */
894   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
895   a->cowners = a->rowners + a->size + 1;
896   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
897   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
898   a->rowners[0] = 0;
899   for ( i=2; i<=a->size; i++ ) {
900     a->rowners[i] += a->rowners[i-1];
901   }
902   a->rstart = a->rowners[a->rank];
903   a->rend   = a->rowners[a->rank+1];
904   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
905   a->cowners[0] = 0;
906   for ( i=2; i<=a->size; i++ ) {
907     a->cowners[i] += a->cowners[i-1];
908   }
909 
910   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
911   PLogObjectParent(mat,a->A);
912 
913   /* build cache for off array entries formed */
914   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
915 
916   /* stuff used for matrix vector multiply */
917   a->lvec        = 0;
918   a->Mvctx       = 0;
919   a->roworiented = 1;
920 
921   *A = mat;
922   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
923   if (flg) {
924     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
925   }
926   return 0;
927 }
928 
929 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
930 {
931   Mat          mat;
932   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
933   int          ierr;
934   FactorCtx    *factor;
935 
936   *newmat       = 0;
937   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
938   PLogObjectCreate(mat);
939   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
940   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
941   mat->destroy   = MatDestroy_MPIDense;
942   mat->view      = MatView_MPIDense;
943   mat->factor    = A->factor;
944   mat->assembled = PETSC_TRUE;
945 
946   a->m = mat->m = oldmat->m;
947   a->n = mat->n = oldmat->n;
948   a->M = mat->M = oldmat->M;
949   a->N = mat->N = oldmat->N;
950   if (oldmat->factor) {
951     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
952     /* copy factor contents ... add this code! */
953   } else a->factor = 0;
954 
955   a->rstart     = oldmat->rstart;
956   a->rend       = oldmat->rend;
957   a->size       = oldmat->size;
958   a->rank       = oldmat->rank;
959   a->insertmode = NOT_SET_VALUES;
960 
961   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
962   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
963   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
964   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
965 
966   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
967   PLogObjectParent(mat,a->lvec);
968   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
969   PLogObjectParent(mat,a->Mvctx);
970   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
971   PLogObjectParent(mat,a->A);
972   *newmat = mat;
973   return 0;
974 }
975 
976 #include "sys.h"
977 
978 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
979 {
980   int        *rowners, i,size,rank,m,rstart,rend,ierr,nz,j;
981   Scalar     *array,*vals,*vals_ptr;
982   MPI_Status status;
983 
984   MPI_Comm_rank(comm,&rank);
985   MPI_Comm_size(comm,&size);
986 
987   /* determine ownership of all rows */
988   m = M/size + ((M % size) > rank);
989   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
990   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
991   rowners[0] = 0;
992   for ( i=2; i<=size; i++ ) {
993     rowners[i] += rowners[i-1];
994   }
995   rstart = rowners[rank];
996   rend   = rowners[rank+1];
997 
998   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
999   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
1000 
1001   if (!rank) {
1002     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1003 
1004     /* read in my part of the matrix numerical values  */
1005     ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr);
1006 
1007     /* insert into matrix-by row (this is why cannot directly read into array */
1008     vals_ptr = vals;
1009     for ( i=0; i<m; i++ ) {
1010       for ( j=0; j<N; j++ ) {
1011         array[i + j*m] = *vals_ptr++;
1012       }
1013     }
1014 
1015     /* read in other processors and ship out */
1016     for ( i=1; i<size; i++ ) {
1017       nz   = (rowners[i+1] - rowners[i])*N;
1018       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1019       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1020     }
1021   }
1022   else {
1023     /* receive numeric values */
1024     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1025 
1026     /* receive message of values*/
1027     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1028 
1029     /* insert into matrix-by row (this is why cannot directly read into array */
1030     vals_ptr = vals;
1031     for ( i=0; i<m; i++ ) {
1032       for ( j=0; j<N; j++ ) {
1033         array[i + j*m] = *vals_ptr++;
1034       }
1035     }
1036   }
1037   PetscFree(rowners);
1038   PetscFree(vals);
1039   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1040   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1041   return 0;
1042 }
1043 
1044 
1045 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1046 {
1047   Mat          A;
1048   int          i, nz, ierr, j,rstart, rend, fd;
1049   Scalar       *vals,*svals;
1050   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1051   MPI_Status   status;
1052   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1053   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1054   int          tag = ((PetscObject)viewer)->tag;
1055 
1056   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1057   if (!rank) {
1058     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1059     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1060     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
1061   }
1062 
1063   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1064   M = header[1]; N = header[2]; nz = header[3];
1065 
1066   /*
1067        Handle case where matrix is stored on disk as a dense matrix
1068   */
1069   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1070     return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1071   }
1072 
1073   /* determine ownership of all rows */
1074   m = M/size + ((M % size) > rank);
1075   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1076   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1077   rowners[0] = 0;
1078   for ( i=2; i<=size; i++ ) {
1079     rowners[i] += rowners[i-1];
1080   }
1081   rstart = rowners[rank];
1082   rend   = rowners[rank+1];
1083 
1084   /* distribute row lengths to all processors */
1085   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1086   offlens = ourlens + (rend-rstart);
1087   if (!rank) {
1088     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1089     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1090     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1091     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1092     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1093     PetscFree(sndcounts);
1094   }
1095   else {
1096     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1097   }
1098 
1099   if (!rank) {
1100     /* calculate the number of nonzeros on each processor */
1101     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1102     PetscMemzero(procsnz,size*sizeof(int));
1103     for ( i=0; i<size; i++ ) {
1104       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1105         procsnz[i] += rowlengths[j];
1106       }
1107     }
1108     PetscFree(rowlengths);
1109 
1110     /* determine max buffer needed and allocate it */
1111     maxnz = 0;
1112     for ( i=0; i<size; i++ ) {
1113       maxnz = PetscMax(maxnz,procsnz[i]);
1114     }
1115     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1116 
1117     /* read in my part of the matrix column indices  */
1118     nz = procsnz[0];
1119     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1120     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1121 
1122     /* read in every one elses and ship off */
1123     for ( i=1; i<size; i++ ) {
1124       nz = procsnz[i];
1125       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1126       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1127     }
1128     PetscFree(cols);
1129   }
1130   else {
1131     /* determine buffer space needed for message */
1132     nz = 0;
1133     for ( i=0; i<m; i++ ) {
1134       nz += ourlens[i];
1135     }
1136     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1137 
1138     /* receive message of column indices*/
1139     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1140     MPI_Get_count(&status,MPI_INT,&maxnz);
1141     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1142   }
1143 
1144   /* loop over local rows, determining number of off diagonal entries */
1145   PetscMemzero(offlens,m*sizeof(int));
1146   jj = 0;
1147   for ( i=0; i<m; i++ ) {
1148     for ( j=0; j<ourlens[i]; j++ ) {
1149       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1150       jj++;
1151     }
1152   }
1153 
1154   /* create our matrix */
1155   for ( i=0; i<m; i++ ) {
1156     ourlens[i] -= offlens[i];
1157   }
1158   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1159   A = *newmat;
1160   for ( i=0; i<m; i++ ) {
1161     ourlens[i] += offlens[i];
1162   }
1163 
1164   if (!rank) {
1165     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1166 
1167     /* read in my part of the matrix numerical values  */
1168     nz = procsnz[0];
1169     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1170 
1171     /* insert into matrix */
1172     jj      = rstart;
1173     smycols = mycols;
1174     svals   = vals;
1175     for ( i=0; i<m; i++ ) {
1176       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1177       smycols += ourlens[i];
1178       svals   += ourlens[i];
1179       jj++;
1180     }
1181 
1182     /* read in other processors and ship out */
1183     for ( i=1; i<size; i++ ) {
1184       nz = procsnz[i];
1185       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1186       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1187     }
1188     PetscFree(procsnz);
1189   }
1190   else {
1191     /* receive numeric values */
1192     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1193 
1194     /* receive message of values*/
1195     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1196     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1197     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1198 
1199     /* insert into matrix */
1200     jj      = rstart;
1201     smycols = mycols;
1202     svals   = vals;
1203     for ( i=0; i<m; i++ ) {
1204       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1205       smycols += ourlens[i];
1206       svals   += ourlens[i];
1207       jj++;
1208     }
1209   }
1210   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1211 
1212   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1213   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1214   return 0;
1215 }
1216 
1217 
1218 
1219 
1220 
1221