xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 639f9d9dbbc54d6ac4e42e98283c540b41bb2cee)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.50 1996/10/01 03:34:29 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
13 {
14   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
15   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
16   int          roworiented = A->roworiented;
17 
18   if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) {
19     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
20   }
21   A->insertmode = addv;
22   for ( i=0; i<m; i++ ) {
23     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
24     if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
25     if (idxm[i] >= rstart && idxm[i] < rend) {
26       row = idxm[i] - rstart;
27       if (roworiented) {
28         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
29       }
30       else {
31         for ( j=0; j<n; j++ ) {
32           if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
33           if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
34           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
35         }
36       }
37     }
38     else {
39       if (roworiented) {
40         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
41       }
42       else { /* must stash each seperately */
43         row = idxm[i];
44         for ( j=0; j<n; j++ ) {
45           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
46         }
47       }
48     }
49   }
50   return 0;
51 }
52 
53 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
54 {
55   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
56   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
57 
58   for ( i=0; i<m; i++ ) {
59     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
60     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
61     if (idxm[i] >= rstart && idxm[i] < rend) {
62       row = idxm[i] - rstart;
63       for ( j=0; j<n; j++ ) {
64         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
65         if (idxn[j] >= mdn->N)
66           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
67         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
68       }
69     }
70     else {
71       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
72     }
73   }
74   return 0;
75 }
76 
77 static int MatGetArray_MPIDense(Mat A,Scalar **array)
78 {
79   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
80   int ierr;
81 
82   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
83   return 0;
84 }
85 
86 static int MatRestoreArray_MPIDense(Mat A,Scalar **array)
87 {
88   return 0;
89 }
90 
91 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
92 {
93   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
94   MPI_Comm     comm = mat->comm;
95   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
96   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
97   int          tag = mat->tag, *owner,*starts,count,ierr;
98   InsertMode   addv;
99   MPI_Request  *send_waits,*recv_waits;
100   Scalar       *rvalues,*svalues;
101 
102   /* make sure all processors are either in INSERTMODE or ADDMODE */
103   MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
104   if (addv == (ADD_VALUES|INSERT_VALUES)) {
105     SETERRQ(1,"MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
106   }
107   mdn->insertmode = addv; /* in case this processor had no cache */
108 
109   /*  first count number of contributors to each processor */
110   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
111   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
112   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
113   for ( i=0; i<mdn->stash.n; i++ ) {
114     idx = mdn->stash.idx[i];
115     for ( j=0; j<size; j++ ) {
116       if (idx >= owners[j] && idx < owners[j+1]) {
117         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
118       }
119     }
120   }
121   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
122 
123   /* inform other processors of number of messages and max length*/
124   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
125   MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);
126   nreceives = work[rank];
127   if (nreceives > size) SETERRQ(1,"MatAssemblyBegin_MPIDense:Internal PETSc error");
128   MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);
129   nmax = work[rank];
130   PetscFree(work);
131 
132   /* post receives:
133        1) each message will consist of ordered pairs
134      (global index,value) we store the global index as a double
135      to simplify the message passing.
136        2) since we don't know how long each individual message is we
137      allocate the largest needed buffer for each receive. Potentially
138      this is a lot of wasted space.
139 
140        This could be done better.
141   */
142   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
143   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
144   for ( i=0; i<nreceives; i++ ) {
145     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
146   }
147 
148   /* do sends:
149       1) starts[i] gives the starting index in svalues for stuff going to
150          the ith processor
151   */
152   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
153   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
154   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
155   starts[0] = 0;
156   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
157   for ( i=0; i<mdn->stash.n; i++ ) {
158     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
159     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
160     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
161   }
162   PetscFree(owner);
163   starts[0] = 0;
164   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
165   count = 0;
166   for ( i=0; i<size; i++ ) {
167     if (procs[i]) {
168       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);
169     }
170   }
171   PetscFree(starts); PetscFree(nprocs);
172 
173   /* Free cache space */
174   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
175 
176   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
177   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
178   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
179   mdn->rmax       = nmax;
180 
181   return 0;
182 }
183 extern int MatSetUpMultiply_MPIDense(Mat);
184 
185 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
186 {
187   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
188   MPI_Status   *send_status,recv_status;
189   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
190   Scalar       *values,val;
191   InsertMode   addv = mdn->insertmode;
192 
193   /*  wait on receives */
194   while (count) {
195     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
196     /* unpack receives into our local space */
197     values = mdn->rvalues + 3*imdex*mdn->rmax;
198     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
199     n = n/3;
200     for ( i=0; i<n; i++ ) {
201       row = (int) PetscReal(values[3*i]) - mdn->rstart;
202       col = (int) PetscReal(values[3*i+1]);
203       val = values[3*i+2];
204       if (col >= 0 && col < mdn->N) {
205         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
206       }
207       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
208     }
209     count--;
210   }
211   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
212 
213   /* wait on sends */
214   if (mdn->nsends) {
215     send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
216     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
217     PetscFree(send_status);
218   }
219   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
220 
221   mdn->insertmode = NOT_SET_VALUES;
222   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
223   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
224 
225   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
226     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
227   }
228   return 0;
229 }
230 
231 static int MatZeroEntries_MPIDense(Mat A)
232 {
233   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
234   return MatZeroEntries(l->A);
235 }
236 
237 static int MatGetBlockSize_MPIDense(Mat A,int *bs)
238 {
239   *bs = 1;
240   return 0;
241 }
242 
243 /* the code does not do the diagonal entries correctly unless the
244    matrix is square and the column and row owerships are identical.
245    This is a BUG. The only way to fix it seems to be to access
246    mdn->A and mdn->B directly and not through the MatZeroRows()
247    routine.
248 */
249 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
250 {
251   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
252   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
253   int            *procs,*nprocs,j,found,idx,nsends,*work;
254   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
255   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
256   int            *lens,imdex,*lrows,*values;
257   MPI_Comm       comm = A->comm;
258   MPI_Request    *send_waits,*recv_waits;
259   MPI_Status     recv_status,*send_status;
260   IS             istmp;
261 
262   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
263   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
264 
265   /*  first count number of contributors to each processor */
266   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
267   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
268   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
269   for ( i=0; i<N; i++ ) {
270     idx = rows[i];
271     found = 0;
272     for ( j=0; j<size; j++ ) {
273       if (idx >= owners[j] && idx < owners[j+1]) {
274         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
275       }
276     }
277     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
278   }
279   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
280 
281   /* inform other processors of number of messages and max length*/
282   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
283   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
284   nrecvs = work[rank];
285   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
286   nmax = work[rank];
287   PetscFree(work);
288 
289   /* post receives:   */
290   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
291   CHKPTRQ(rvalues);
292   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
293   CHKPTRQ(recv_waits);
294   for ( i=0; i<nrecvs; i++ ) {
295     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
296   }
297 
298   /* do sends:
299       1) starts[i] gives the starting index in svalues for stuff going to
300          the ith processor
301   */
302   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
303   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
304   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
305   starts[0]  = 0;
306   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
307   for ( i=0; i<N; i++ ) {
308     svalues[starts[owner[i]]++] = rows[i];
309   }
310   ISRestoreIndices(is,&rows);
311 
312   starts[0] = 0;
313   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
314   count = 0;
315   for ( i=0; i<size; i++ ) {
316     if (procs[i]) {
317       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
318     }
319   }
320   PetscFree(starts);
321 
322   base = owners[rank];
323 
324   /*  wait on receives */
325   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
326   source = lens + nrecvs;
327   count  = nrecvs; slen = 0;
328   while (count) {
329     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
330     /* unpack receives into our local space */
331     MPI_Get_count(&recv_status,MPI_INT,&n);
332     source[imdex]  = recv_status.MPI_SOURCE;
333     lens[imdex]  = n;
334     slen += n;
335     count--;
336   }
337   PetscFree(recv_waits);
338 
339   /* move the data into the send scatter */
340   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
341   count = 0;
342   for ( i=0; i<nrecvs; i++ ) {
343     values = rvalues + i*nmax;
344     for ( j=0; j<lens[i]; j++ ) {
345       lrows[count++] = values[j] - base;
346     }
347   }
348   PetscFree(rvalues); PetscFree(lens);
349   PetscFree(owner); PetscFree(nprocs);
350 
351   /* actually zap the local rows */
352   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
353   PLogObjectParent(A,istmp);
354   PetscFree(lrows);
355   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
356   ierr = ISDestroy(istmp); CHKERRQ(ierr);
357 
358   /* wait on sends */
359   if (nsends) {
360     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
361     MPI_Waitall(nsends,send_waits,send_status);
362     PetscFree(send_status);
363   }
364   PetscFree(send_waits); PetscFree(svalues);
365 
366   return 0;
367 }
368 
369 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
370 {
371   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
372   int          ierr;
373 
374   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
375   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
376   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
377   return 0;
378 }
379 
380 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
381 {
382   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
383   int          ierr;
384 
385   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
386   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
387   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
388   return 0;
389 }
390 
391 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
392 {
393   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
394   int          ierr;
395   Scalar       zero = 0.0;
396 
397   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
398   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
399   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
400   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
401   return 0;
402 }
403 
404 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
405 {
406   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
407   int          ierr;
408 
409   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
410   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
411   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
412   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
413   return 0;
414 }
415 
416 static int MatGetDiagonal_MPIDense(Mat A,Vec v)
417 {
418   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
419   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
420   int          ierr, len, i, n, m = a->m, radd;
421   Scalar       *x, zero = 0.0;
422 
423   VecSet(&zero,v);
424   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
425   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
426   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
427   len = PetscMin(aloc->m,aloc->n);
428   radd = a->rstart*m;
429   for ( i=0; i<len; i++ ) {
430     x[i] = aloc->v[radd + i*m + i];
431   }
432   return 0;
433 }
434 
435 static int MatDestroy_MPIDense(PetscObject obj)
436 {
437   Mat          mat = (Mat) obj;
438   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
439   int          ierr;
440 
441 #if defined(PETSC_LOG)
442   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
443 #endif
444   PetscFree(mdn->rowners);
445   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
446   if (mdn->lvec)   VecDestroy(mdn->lvec);
447   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
448   if (mdn->factor) {
449     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
450     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
451     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
452     PetscFree(mdn->factor);
453   }
454   PetscFree(mdn);
455   PLogObjectDestroy(mat);
456   PetscHeaderDestroy(mat);
457   return 0;
458 }
459 
460 #include "pinclude/pviewer.h"
461 
462 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
463 {
464   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
465   int          ierr;
466 
467   if (mdn->size == 1) {
468     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
469   }
470   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
471   return 0;
472 }
473 
474 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
475 {
476   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
477   int          ierr, format;
478   FILE         *fd;
479   ViewerType   vtype;
480 
481   ViewerGetType(viewer,&vtype);
482   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
483   ierr = ViewerGetFormat(viewer,&format);
484   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
485     int rank;
486     MatInfo info;
487     MPI_Comm_rank(mat->comm,&rank);
488     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
489     PetscSequentialPhaseBegin(mat->comm,1);
490       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
491          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
492       fflush(fd);
493     PetscSequentialPhaseEnd(mat->comm,1);
494     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
495     return 0;
496   }
497   else if (format == VIEWER_FORMAT_ASCII_INFO) {
498     return 0;
499   }
500   if (vtype == ASCII_FILE_VIEWER) {
501     PetscSequentialPhaseBegin(mat->comm,1);
502     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
503              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
504     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
505     fflush(fd);
506     PetscSequentialPhaseEnd(mat->comm,1);
507   }
508   else {
509     int size = mdn->size, rank = mdn->rank;
510     if (size == 1) {
511       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
512     }
513     else {
514       /* assemble the entire matrix onto first processor. */
515       Mat          A;
516       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
517       Scalar       *vals;
518       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
519 
520       if (!rank) {
521         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
522       }
523       else {
524         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
525       }
526       PLogObjectParent(mat,A);
527 
528       /* Copy the matrix ... This isn't the most efficient means,
529          but it's quick for now */
530       row = mdn->rstart; m = Amdn->m;
531       for ( i=0; i<m; i++ ) {
532         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
533         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
534         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
535         row++;
536       }
537 
538       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
539       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
540       if (!rank) {
541         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
542       }
543       ierr = MatDestroy(A); CHKERRQ(ierr);
544     }
545   }
546   return 0;
547 }
548 
549 static int MatView_MPIDense(PetscObject obj,Viewer viewer)
550 {
551   Mat          mat = (Mat) obj;
552   int          ierr;
553   ViewerType   vtype;
554 
555   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
556   if (vtype == ASCII_FILE_VIEWER) {
557     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
558   }
559   else if (vtype == ASCII_FILES_VIEWER) {
560     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
561   }
562   else if (vtype == BINARY_FILE_VIEWER) {
563     return MatView_MPIDense_Binary(mat,viewer);
564   }
565   return 0;
566 }
567 
568 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
569 {
570   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
571   Mat          mdn = mat->A;
572   int          ierr;
573   double       isend[5], irecv[5];
574 
575   info->rows_global    = (double)mat->M;
576   info->columns_global = (double)mat->N;
577   info->rows_local     = (double)mat->m;
578   info->columns_local  = (double)mat->N;
579   info->block_size     = 1.0;
580   ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr);
581   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
582   isend[3] = info->memory;  isend[4] = info->mallocs;
583   if (flag == MAT_LOCAL) {
584     info->nz_used      = isend[0];
585     info->nz_allocated = isend[1];
586     info->nz_unneeded  = isend[2];
587     info->memory       = isend[3];
588     info->mallocs      = isend[4];
589   } else if (flag == MAT_GLOBAL_MAX) {
590     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
591     info->nz_used      = irecv[0];
592     info->nz_allocated = irecv[1];
593     info->nz_unneeded  = irecv[2];
594     info->memory       = irecv[3];
595     info->mallocs      = irecv[4];
596   } else if (flag == MAT_GLOBAL_SUM) {
597     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
598     info->nz_used      = irecv[0];
599     info->nz_allocated = irecv[1];
600     info->nz_unneeded  = irecv[2];
601     info->memory       = irecv[3];
602     info->mallocs      = irecv[4];
603   }
604   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
605   info->fill_ratio_needed = 0;
606   info->factor_mallocs    = 0;
607   return 0;
608 }
609 
610 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
611    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
612    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
613    extern int MatSolve_MPIDense(Mat,Vec,Vec);
614    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
615    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
616    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
617 
618 static int MatSetOption_MPIDense(Mat A,MatOption op)
619 {
620   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
621 
622   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
623       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
624       op == MAT_COLUMNS_SORTED ||
625       op == MAT_ROW_ORIENTED) {
626         MatSetOption(a->A,op);
627   }
628   else if (op == MAT_ROWS_SORTED ||
629            op == MAT_SYMMETRIC ||
630            op == MAT_STRUCTURALLY_SYMMETRIC ||
631            op == MAT_YES_NEW_DIAGONALS)
632     PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n");
633   else if (op == MAT_COLUMN_ORIENTED)
634     { a->roworiented = 0; MatSetOption(a->A,op);}
635   else if (op == MAT_NO_NEW_DIAGONALS)
636     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:MAT_NO_NEW_DIAGONALS");}
637   else
638     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
639   return 0;
640 }
641 
642 static int MatGetSize_MPIDense(Mat A,int *m,int *n)
643 {
644   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
645   *m = mat->M; *n = mat->N;
646   return 0;
647 }
648 
649 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
650 {
651   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
652   *m = mat->m; *n = mat->N;
653   return 0;
654 }
655 
656 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
657 {
658   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
659   *m = mat->rstart; *n = mat->rend;
660   return 0;
661 }
662 
663 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
664 {
665   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
666   int          lrow, rstart = mat->rstart, rend = mat->rend;
667 
668   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
669   lrow = row - rstart;
670   return MatGetRow(mat->A,lrow,nz,idx,v);
671 }
672 
673 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
674 {
675   if (idx) PetscFree(*idx);
676   if (v) PetscFree(*v);
677   return 0;
678 }
679 
680 static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
681 {
682   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
683   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
684   int          ierr, i, j;
685   double       sum = 0.0;
686   Scalar       *v = mat->v;
687 
688   if (mdn->size == 1) {
689     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
690   } else {
691     if (type == NORM_FROBENIUS) {
692       for (i=0; i<mat->n*mat->m; i++ ) {
693 #if defined(PETSC_COMPLEX)
694         sum += real(conj(*v)*(*v)); v++;
695 #else
696         sum += (*v)*(*v); v++;
697 #endif
698       }
699       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
700       *norm = sqrt(*norm);
701       PLogFlops(2*mat->n*mat->m);
702     }
703     else if (type == NORM_1) {
704       double *tmp, *tmp2;
705       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
706       tmp2 = tmp + mdn->N;
707       PetscMemzero(tmp,2*mdn->N*sizeof(double));
708       *norm = 0.0;
709       v = mat->v;
710       for ( j=0; j<mat->n; j++ ) {
711         for ( i=0; i<mat->m; i++ ) {
712           tmp[j] += PetscAbsScalar(*v);  v++;
713         }
714       }
715       MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
716       for ( j=0; j<mdn->N; j++ ) {
717         if (tmp2[j] > *norm) *norm = tmp2[j];
718       }
719       PetscFree(tmp);
720       PLogFlops(mat->n*mat->m);
721     }
722     else if (type == NORM_INFINITY) { /* max row norm */
723       double ntemp;
724       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
725       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
726     }
727     else {
728       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
729     }
730   }
731   return 0;
732 }
733 
734 static int MatTranspose_MPIDense(Mat A,Mat *matout)
735 {
736   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
737   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
738   Mat          B;
739   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
740   int          j, i, ierr;
741   Scalar       *v;
742 
743   if (matout == PETSC_NULL && M != N) {
744     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
745   }
746   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
747 
748   m = Aloc->m; n = Aloc->n; v = Aloc->v;
749   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
750   for ( j=0; j<n; j++ ) {
751     for (i=0; i<m; i++) rwork[i] = rstart + i;
752     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
753     v   += m;
754   }
755   PetscFree(rwork);
756   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
757   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
758   if (matout != PETSC_NULL) {
759     *matout = B;
760   } else {
761     /* This isn't really an in-place transpose, but free data struct from a */
762     PetscFree(a->rowners);
763     ierr = MatDestroy(a->A); CHKERRQ(ierr);
764     if (a->lvec) VecDestroy(a->lvec);
765     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
766     PetscFree(a);
767     PetscMemcpy(A,B,sizeof(struct _Mat));
768     PetscHeaderDestroy(B);
769   }
770   return 0;
771 }
772 
773 #include "pinclude/plapack.h"
774 static int MatScale_MPIDense(Scalar *alpha,Mat inA)
775 {
776   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
777   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
778   int          one = 1, nz;
779 
780   nz = a->m*a->n;
781   BLscal_( &nz, alpha, a->v, &one );
782   PLogFlops(nz);
783   return 0;
784 }
785 
786 static int MatConvertSameType_MPIDense(Mat,Mat *,int);
787 
788 /* -------------------------------------------------------------------*/
789 static struct _MatOps MatOps = {MatSetValues_MPIDense,
790        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
791        MatMult_MPIDense,MatMultAdd_MPIDense,
792        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
793 /*       MatSolve_MPIDense,0, */
794        0,0,
795        0,0,
796        0,0,
797 /*       MatLUFactor_MPIDense,0, */
798        0,MatTranspose_MPIDense,
799        MatGetInfo_MPIDense,0,
800        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
801        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
802        0,
803        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
804        0,0,
805 /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
806        0,0,
807        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
808        MatGetOwnershipRange_MPIDense,
809        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
810        0,MatConvertSameType_MPIDense,
811        0,0,0,0,0,
812        0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense,
813        0,0,0,MatGetBlockSize_MPIDense};
814 
815 /*@C
816    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
817 
818    Input Parameters:
819 .  comm - MPI communicator
820 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
821 .  n - number of local columns (or PETSC_DECIDE to have calculated
822            if N is given)
823 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
824 .  N - number of global columns (or PETSC_DECIDE to have calculated
825            if n is given)
826 .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
827    to control all matrix memory allocation.
828 
829    Output Parameter:
830 .  A - the matrix
831 
832    Notes:
833    The dense format is fully compatible with standard Fortran 77
834    storage by columns.
835 
836    The data input variable is intended primarily for Fortran programmers
837    who wish to allocate their own matrix memory space.  Most users should
838    set data=PETSC_NULL.
839 
840    The user MUST specify either the local or global matrix dimensions
841    (possibly both).
842 
843    Currently, the only parallel dense matrix decomposition is by rows,
844    so that n=N and each submatrix owns all of the global columns.
845 
846 .keywords: matrix, dense, parallel
847 
848 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
849 @*/
850 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
851 {
852   Mat          mat;
853   Mat_MPIDense *a;
854   int          ierr, i,flg;
855 
856   /* Note:  For now, when data is specified above, this assumes the user correctly
857    allocates the local dense storage space.  We should add error checking. */
858 
859   *A = 0;
860   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
861   PLogObjectCreate(mat);
862   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
863   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
864   mat->destroy    = MatDestroy_MPIDense;
865   mat->view       = MatView_MPIDense;
866   mat->factor     = 0;
867 
868   a->factor     = 0;
869   a->insertmode = NOT_SET_VALUES;
870   MPI_Comm_rank(comm,&a->rank);
871   MPI_Comm_size(comm,&a->size);
872 
873   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
874   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
875 
876   /* each row stores all columns */
877   if (N == PETSC_DECIDE) N = n;
878   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
879   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
880   a->N = mat->N = N;
881   a->M = mat->M = M;
882   a->m = mat->m = m;
883   a->n = mat->n = n;
884 
885   /* build local table of row and column ownerships */
886   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
887   a->cowners = a->rowners + a->size + 1;
888   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
889   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
890   a->rowners[0] = 0;
891   for ( i=2; i<=a->size; i++ ) {
892     a->rowners[i] += a->rowners[i-1];
893   }
894   a->rstart = a->rowners[a->rank];
895   a->rend   = a->rowners[a->rank+1];
896   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
897   a->cowners[0] = 0;
898   for ( i=2; i<=a->size; i++ ) {
899     a->cowners[i] += a->cowners[i-1];
900   }
901 
902   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
903   PLogObjectParent(mat,a->A);
904 
905   /* build cache for off array entries formed */
906   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
907 
908   /* stuff used for matrix vector multiply */
909   a->lvec        = 0;
910   a->Mvctx       = 0;
911   a->roworiented = 1;
912 
913   *A = mat;
914   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
915   if (flg) {
916     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
917   }
918   return 0;
919 }
920 
921 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
922 {
923   Mat          mat;
924   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
925   int          ierr;
926   FactorCtx    *factor;
927 
928   *newmat       = 0;
929   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
930   PLogObjectCreate(mat);
931   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
932   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
933   mat->destroy   = MatDestroy_MPIDense;
934   mat->view      = MatView_MPIDense;
935   mat->factor    = A->factor;
936   mat->assembled = PETSC_TRUE;
937 
938   a->m = mat->m = oldmat->m;
939   a->n = mat->n = oldmat->n;
940   a->M = mat->M = oldmat->M;
941   a->N = mat->N = oldmat->N;
942   if (oldmat->factor) {
943     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
944     /* copy factor contents ... add this code! */
945   } else a->factor = 0;
946 
947   a->rstart     = oldmat->rstart;
948   a->rend       = oldmat->rend;
949   a->size       = oldmat->size;
950   a->rank       = oldmat->rank;
951   a->insertmode = NOT_SET_VALUES;
952 
953   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
954   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
955   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
956   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
957 
958   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
959   PLogObjectParent(mat,a->lvec);
960   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
961   PLogObjectParent(mat,a->Mvctx);
962   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
963   PLogObjectParent(mat,a->A);
964   *newmat = mat;
965   return 0;
966 }
967 
968 #include "sys.h"
969 
970 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
971 {
972   int        *rowners, i,size,rank,m,rstart,rend,ierr,nz,j;
973   Scalar     *array,*vals,*vals_ptr;
974   MPI_Status status;
975 
976   MPI_Comm_rank(comm,&rank);
977   MPI_Comm_size(comm,&size);
978 
979   /* determine ownership of all rows */
980   m = M/size + ((M % size) > rank);
981   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
982   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
983   rowners[0] = 0;
984   for ( i=2; i<=size; i++ ) {
985     rowners[i] += rowners[i-1];
986   }
987   rstart = rowners[rank];
988   rend   = rowners[rank+1];
989 
990   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
991   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
992 
993   if (!rank) {
994     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
995 
996     /* read in my part of the matrix numerical values  */
997     ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr);
998 
999     /* insert into matrix-by row (this is why cannot directly read into array */
1000     vals_ptr = vals;
1001     for ( i=0; i<m; i++ ) {
1002       for ( j=0; j<N; j++ ) {
1003         array[i + j*m] = *vals_ptr++;
1004       }
1005     }
1006 
1007     /* read in other processors and ship out */
1008     for ( i=1; i<size; i++ ) {
1009       nz   = (rowners[i+1] - rowners[i])*N;
1010       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1011       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1012     }
1013   }
1014   else {
1015     /* receive numeric values */
1016     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1017 
1018     /* receive message of values*/
1019     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1020 
1021     /* insert into matrix-by row (this is why cannot directly read into array */
1022     vals_ptr = vals;
1023     for ( i=0; i<m; i++ ) {
1024       for ( j=0; j<N; j++ ) {
1025         array[i + j*m] = *vals_ptr++;
1026       }
1027     }
1028   }
1029   PetscFree(rowners);
1030   PetscFree(vals);
1031   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1033   return 0;
1034 }
1035 
1036 
1037 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1038 {
1039   Mat          A;
1040   int          i, nz, ierr, j,rstart, rend, fd;
1041   Scalar       *vals,*svals;
1042   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1043   MPI_Status   status;
1044   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1045   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1046   int          tag = ((PetscObject)viewer)->tag;
1047 
1048   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1049   if (!rank) {
1050     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1051     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1052     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
1053   }
1054 
1055   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1056   M = header[1]; N = header[2]; nz = header[3];
1057 
1058   /*
1059        Handle case where matrix is stored on disk as a dense matrix
1060   */
1061   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1062     return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1063   }
1064 
1065   /* determine ownership of all rows */
1066   m = M/size + ((M % size) > rank);
1067   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1068   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1069   rowners[0] = 0;
1070   for ( i=2; i<=size; i++ ) {
1071     rowners[i] += rowners[i-1];
1072   }
1073   rstart = rowners[rank];
1074   rend   = rowners[rank+1];
1075 
1076   /* distribute row lengths to all processors */
1077   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1078   offlens = ourlens + (rend-rstart);
1079   if (!rank) {
1080     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1081     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1082     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1083     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1084     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1085     PetscFree(sndcounts);
1086   }
1087   else {
1088     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1089   }
1090 
1091   if (!rank) {
1092     /* calculate the number of nonzeros on each processor */
1093     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1094     PetscMemzero(procsnz,size*sizeof(int));
1095     for ( i=0; i<size; i++ ) {
1096       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1097         procsnz[i] += rowlengths[j];
1098       }
1099     }
1100     PetscFree(rowlengths);
1101 
1102     /* determine max buffer needed and allocate it */
1103     maxnz = 0;
1104     for ( i=0; i<size; i++ ) {
1105       maxnz = PetscMax(maxnz,procsnz[i]);
1106     }
1107     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1108 
1109     /* read in my part of the matrix column indices  */
1110     nz = procsnz[0];
1111     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1112     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1113 
1114     /* read in every one elses and ship off */
1115     for ( i=1; i<size; i++ ) {
1116       nz = procsnz[i];
1117       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1118       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1119     }
1120     PetscFree(cols);
1121   }
1122   else {
1123     /* determine buffer space needed for message */
1124     nz = 0;
1125     for ( i=0; i<m; i++ ) {
1126       nz += ourlens[i];
1127     }
1128     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1129 
1130     /* receive message of column indices*/
1131     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1132     MPI_Get_count(&status,MPI_INT,&maxnz);
1133     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1134   }
1135 
1136   /* loop over local rows, determining number of off diagonal entries */
1137   PetscMemzero(offlens,m*sizeof(int));
1138   jj = 0;
1139   for ( i=0; i<m; i++ ) {
1140     for ( j=0; j<ourlens[i]; j++ ) {
1141       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1142       jj++;
1143     }
1144   }
1145 
1146   /* create our matrix */
1147   for ( i=0; i<m; i++ ) {
1148     ourlens[i] -= offlens[i];
1149   }
1150   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1151   A = *newmat;
1152   for ( i=0; i<m; i++ ) {
1153     ourlens[i] += offlens[i];
1154   }
1155 
1156   if (!rank) {
1157     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1158 
1159     /* read in my part of the matrix numerical values  */
1160     nz = procsnz[0];
1161     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1162 
1163     /* insert into matrix */
1164     jj      = rstart;
1165     smycols = mycols;
1166     svals   = vals;
1167     for ( i=0; i<m; i++ ) {
1168       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1169       smycols += ourlens[i];
1170       svals   += ourlens[i];
1171       jj++;
1172     }
1173 
1174     /* read in other processors and ship out */
1175     for ( i=1; i<size; i++ ) {
1176       nz = procsnz[i];
1177       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1178       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1179     }
1180     PetscFree(procsnz);
1181   }
1182   else {
1183     /* receive numeric values */
1184     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1185 
1186     /* receive message of values*/
1187     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1188     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1189     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1190 
1191     /* insert into matrix */
1192     jj      = rstart;
1193     smycols = mycols;
1194     svals   = vals;
1195     for ( i=0; i<m; i++ ) {
1196       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1197       smycols += ourlens[i];
1198       svals   += ourlens[i];
1199       jj++;
1200     }
1201   }
1202   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1203 
1204   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1205   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1206   return 0;
1207 }
1208 
1209 
1210 
1211 
1212 
1213