xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 0d4b0b6c1a374e6007cd4b2fbb5567147f7b1605)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.46 1996/08/15 12:47:17 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
13 {
14   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
15   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
16   int          roworiented = A->roworiented;
17 
18   if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) {
19     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
20   }
21   A->insertmode = addv;
22   for ( i=0; i<m; i++ ) {
23     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
24     if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
25     if (idxm[i] >= rstart && idxm[i] < rend) {
26       row = idxm[i] - rstart;
27       if (roworiented) {
28         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
29       }
30       else {
31         for ( j=0; j<n; j++ ) {
32           if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
33           if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
34           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
35         }
36       }
37     }
38     else {
39       if (roworiented) {
40         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
41       }
42       else { /* must stash each seperately */
43         row = idxm[i];
44         for ( j=0; j<n; j++ ) {
45           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
46         }
47       }
48     }
49   }
50   return 0;
51 }
52 
53 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
54 {
55   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
56   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
57 
58   for ( i=0; i<m; i++ ) {
59     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
60     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
61     if (idxm[i] >= rstart && idxm[i] < rend) {
62       row = idxm[i] - rstart;
63       for ( j=0; j<n; j++ ) {
64         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
65         if (idxn[j] >= mdn->N)
66           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
67         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
68       }
69     }
70     else {
71       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
72     }
73   }
74   return 0;
75 }
76 
77 static int MatGetArray_MPIDense(Mat A,Scalar **array)
78 {
79   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
80   int ierr;
81 
82   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
83   return 0;
84 }
85 
86 static int MatRestoreArray_MPIDense(Mat A,Scalar **array)
87 {
88   return 0;
89 }
90 
91 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
92 {
93   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
94   MPI_Comm     comm = mat->comm;
95   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
96   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
97   int          tag = mat->tag, *owner,*starts,count,ierr;
98   InsertMode   addv;
99   MPI_Request  *send_waits,*recv_waits;
100   Scalar       *rvalues,*svalues;
101 
102   /* make sure all processors are either in INSERTMODE or ADDMODE */
103   MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
104   if (addv == (ADD_VALUES|INSERT_VALUES)) {
105     SETERRQ(1,"MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
106   }
107   mdn->insertmode = addv; /* in case this processor had no cache */
108 
109   /*  first count number of contributors to each processor */
110   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
111   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
112   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
113   for ( i=0; i<mdn->stash.n; i++ ) {
114     idx = mdn->stash.idx[i];
115     for ( j=0; j<size; j++ ) {
116       if (idx >= owners[j] && idx < owners[j+1]) {
117         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
118       }
119     }
120   }
121   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
122 
123   /* inform other processors of number of messages and max length*/
124   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
125   MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);
126   nreceives = work[rank];
127   MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);
128   nmax = work[rank];
129   PetscFree(work);
130 
131   /* post receives:
132        1) each message will consist of ordered pairs
133      (global index,value) we store the global index as a double
134      to simplify the message passing.
135        2) since we don't know how long each individual message is we
136      allocate the largest needed buffer for each receive. Potentially
137      this is a lot of wasted space.
138 
139        This could be done better.
140   */
141   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
142   CHKPTRQ(rvalues);
143   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
144   CHKPTRQ(recv_waits);
145   for ( i=0; i<nreceives; i++ ) {
146     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
147   }
148 
149   /* do sends:
150       1) starts[i] gives the starting index in svalues for stuff going to
151          the ith processor
152   */
153   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
154   CHKPTRQ(svalues);
155   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
156   CHKPTRQ(send_waits);
157   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
158   starts[0] = 0;
159   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
160   for ( i=0; i<mdn->stash.n; i++ ) {
161     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
162     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
163     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
164   }
165   PetscFree(owner);
166   starts[0] = 0;
167   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
168   count = 0;
169   for ( i=0; i<size; i++ ) {
170     if (procs[i]) {
171       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);
172     }
173   }
174   PetscFree(starts); PetscFree(nprocs);
175 
176   /* Free cache space */
177   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
178 
179   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
180   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
181   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
182   mdn->rmax       = nmax;
183 
184   return 0;
185 }
186 extern int MatSetUpMultiply_MPIDense(Mat);
187 
188 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
189 {
190   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
191   MPI_Status   *send_status,recv_status;
192   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
193   Scalar       *values,val;
194   InsertMode   addv = mdn->insertmode;
195 
196   /*  wait on receives */
197   while (count) {
198     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
199     /* unpack receives into our local space */
200     values = mdn->rvalues + 3*imdex*mdn->rmax;
201     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
202     n = n/3;
203     for ( i=0; i<n; i++ ) {
204       row = (int) PetscReal(values[3*i]) - mdn->rstart;
205       col = (int) PetscReal(values[3*i+1]);
206       val = values[3*i+2];
207       if (col >= 0 && col < mdn->N) {
208         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
209       }
210       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
211     }
212     count--;
213   }
214   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
215 
216   /* wait on sends */
217   if (mdn->nsends) {
218     send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
219     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
220     PetscFree(send_status);
221   }
222   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
223 
224   mdn->insertmode = NOT_SET_VALUES;
225   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
226   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
227 
228   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
229     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
230   }
231   return 0;
232 }
233 
234 static int MatZeroEntries_MPIDense(Mat A)
235 {
236   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
237   return MatZeroEntries(l->A);
238 }
239 
240 /* the code does not do the diagonal entries correctly unless the
241    matrix is square and the column and row owerships are identical.
242    This is a BUG. The only way to fix it seems to be to access
243    mdn->A and mdn->B directly and not through the MatZeroRows()
244    routine.
245 */
246 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
247 {
248   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
249   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
250   int            *procs,*nprocs,j,found,idx,nsends,*work;
251   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
252   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
253   int            *lens,imdex,*lrows,*values;
254   MPI_Comm       comm = A->comm;
255   MPI_Request    *send_waits,*recv_waits;
256   MPI_Status     recv_status,*send_status;
257   IS             istmp;
258 
259   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
260   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
261 
262   /*  first count number of contributors to each processor */
263   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
264   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
265   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
266   for ( i=0; i<N; i++ ) {
267     idx = rows[i];
268     found = 0;
269     for ( j=0; j<size; j++ ) {
270       if (idx >= owners[j] && idx < owners[j+1]) {
271         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
272       }
273     }
274     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
275   }
276   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
277 
278   /* inform other processors of number of messages and max length*/
279   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
280   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
281   nrecvs = work[rank];
282   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
283   nmax = work[rank];
284   PetscFree(work);
285 
286   /* post receives:   */
287   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
288   CHKPTRQ(rvalues);
289   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
290   CHKPTRQ(recv_waits);
291   for ( i=0; i<nrecvs; i++ ) {
292     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
293   }
294 
295   /* do sends:
296       1) starts[i] gives the starting index in svalues for stuff going to
297          the ith processor
298   */
299   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
300   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
301   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
302   starts[0]  = 0;
303   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
304   for ( i=0; i<N; i++ ) {
305     svalues[starts[owner[i]]++] = rows[i];
306   }
307   ISRestoreIndices(is,&rows);
308 
309   starts[0] = 0;
310   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
311   count = 0;
312   for ( i=0; i<size; i++ ) {
313     if (procs[i]) {
314       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
315     }
316   }
317   PetscFree(starts);
318 
319   base = owners[rank];
320 
321   /*  wait on receives */
322   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
323   source = lens + nrecvs;
324   count  = nrecvs; slen = 0;
325   while (count) {
326     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
327     /* unpack receives into our local space */
328     MPI_Get_count(&recv_status,MPI_INT,&n);
329     source[imdex]  = recv_status.MPI_SOURCE;
330     lens[imdex]  = n;
331     slen += n;
332     count--;
333   }
334   PetscFree(recv_waits);
335 
336   /* move the data into the send scatter */
337   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
338   count = 0;
339   for ( i=0; i<nrecvs; i++ ) {
340     values = rvalues + i*nmax;
341     for ( j=0; j<lens[i]; j++ ) {
342       lrows[count++] = values[j] - base;
343     }
344   }
345   PetscFree(rvalues); PetscFree(lens);
346   PetscFree(owner); PetscFree(nprocs);
347 
348   /* actually zap the local rows */
349   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
350   PLogObjectParent(A,istmp);
351   PetscFree(lrows);
352   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
353   ierr = ISDestroy(istmp); CHKERRQ(ierr);
354 
355   /* wait on sends */
356   if (nsends) {
357     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
358     MPI_Waitall(nsends,send_waits,send_status);
359     PetscFree(send_status);
360   }
361   PetscFree(send_waits); PetscFree(svalues);
362 
363   return 0;
364 }
365 
366 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
367 {
368   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
369   int          ierr;
370 
371   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
372   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
373   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
374   return 0;
375 }
376 
377 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
378 {
379   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
380   int          ierr;
381 
382   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
383   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr);
384   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
385   return 0;
386 }
387 
388 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
389 {
390   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
391   int          ierr;
392   Scalar       zero = 0.0;
393 
394   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
395   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
396   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
397   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
398   return 0;
399 }
400 
401 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
402 {
403   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
404   int          ierr;
405 
406   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
407   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
408   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
409   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
410   return 0;
411 }
412 
413 static int MatGetDiagonal_MPIDense(Mat A,Vec v)
414 {
415   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
416   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
417   int          ierr, len, i, n, m = a->m, radd;
418   Scalar       *x, zero = 0.0;
419 
420   VecSet(&zero,v);
421   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
422   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
423   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
424   len = PetscMin(aloc->m,aloc->n);
425   radd = a->rstart*m;
426   for ( i=0; i<len; i++ ) {
427     x[i] = aloc->v[radd + i*m + i];
428   }
429   return 0;
430 }
431 
432 static int MatDestroy_MPIDense(PetscObject obj)
433 {
434   Mat          mat = (Mat) obj;
435   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
436   int          ierr;
437 
438 #if defined(PETSC_LOG)
439   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
440 #endif
441   PetscFree(mdn->rowners);
442   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
443   if (mdn->lvec)   VecDestroy(mdn->lvec);
444   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
445   if (mdn->factor) {
446     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
447     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
448     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
449     PetscFree(mdn->factor);
450   }
451   PetscFree(mdn);
452   PLogObjectDestroy(mat);
453   PetscHeaderDestroy(mat);
454   return 0;
455 }
456 
457 #include "pinclude/pviewer.h"
458 
459 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
460 {
461   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
462   int          ierr;
463 
464   if (mdn->size == 1) {
465     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
466   }
467   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
468   return 0;
469 }
470 
471 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
472 {
473   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
474   int          ierr, format;
475   FILE         *fd;
476   ViewerType   vtype;
477 
478   ViewerGetType(viewer,&vtype);
479   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
480   ierr = ViewerGetFormat(viewer,&format);
481   if (format == ASCII_FORMAT_INFO_DETAILED) {
482     int nz, nzalloc, mem, rank;
483     MPI_Comm_rank(mat->comm,&rank);
484     ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
485     PetscSequentialPhaseBegin(mat->comm,1);
486       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
487           rank,mdn->m,nz,nzalloc,mem);
488       fflush(fd);
489     PetscSequentialPhaseEnd(mat->comm,1);
490     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
491     return 0;
492   }
493   else if (format == ASCII_FORMAT_INFO) {
494     return 0;
495   }
496   if (vtype == ASCII_FILE_VIEWER) {
497     PetscSequentialPhaseBegin(mat->comm,1);
498     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
499              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
500     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
501     fflush(fd);
502     PetscSequentialPhaseEnd(mat->comm,1);
503   }
504   else {
505     int size = mdn->size, rank = mdn->rank;
506     if (size == 1) {
507       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
508     }
509     else {
510       /* assemble the entire matrix onto first processor. */
511       Mat          A;
512       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
513       Scalar       *vals;
514       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
515 
516       if (!rank) {
517         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
518       }
519       else {
520         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
521       }
522       PLogObjectParent(mat,A);
523 
524       /* Copy the matrix ... This isn't the most efficient means,
525          but it's quick for now */
526       row = mdn->rstart; m = Amdn->m;
527       for ( i=0; i<m; i++ ) {
528         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
529         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
530         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
531         row++;
532       }
533 
534       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
535       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
536       if (!rank) {
537         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
538       }
539       ierr = MatDestroy(A); CHKERRQ(ierr);
540     }
541   }
542   return 0;
543 }
544 
545 static int MatView_MPIDense(PetscObject obj,Viewer viewer)
546 {
547   Mat          mat = (Mat) obj;
548   int          ierr;
549   ViewerType   vtype;
550 
551   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
552   if (vtype == ASCII_FILE_VIEWER) {
553     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
554   }
555   else if (vtype == ASCII_FILES_VIEWER) {
556     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
557   }
558   else if (vtype == BINARY_FILE_VIEWER) {
559     return MatView_MPIDense_Binary(mat,viewer);
560   }
561   return 0;
562 }
563 
564 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,int *nzalloc,int *mem)
565 {
566   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
567   Mat          mdn = mat->A;
568   int          ierr, isend[3], irecv[3];
569 
570   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
571   if (flag == MAT_LOCAL) {
572     if (nz)      *nz      = isend[0];
573     if (nzalloc) *nzalloc = isend[1];
574     if (mem)     *mem     = isend[2];
575   } else if (flag == MAT_GLOBAL_MAX) {
576     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
577     if (nz)      *nz      = irecv[0];
578     if (nzalloc) *nzalloc = irecv[1];
579     if (mem)     *mem     = irecv[2];
580   } else if (flag == MAT_GLOBAL_SUM) {
581     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
582     if (nz)      *nz      = irecv[0];
583     if (nzalloc) *nzalloc = irecv[1];
584     if (mem)     *mem     = irecv[2];
585   }
586   return 0;
587 }
588 
589 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
590    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
591    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
592    extern int MatSolve_MPIDense(Mat,Vec,Vec);
593    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
594    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
595    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
596 
597 static int MatSetOption_MPIDense(Mat A,MatOption op)
598 {
599   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
600 
601   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
602       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
603       op == MAT_COLUMNS_SORTED ||
604       op == MAT_ROW_ORIENTED) {
605         MatSetOption(a->A,op);
606   }
607   else if (op == MAT_ROWS_SORTED ||
608            op == MAT_SYMMETRIC ||
609            op == MAT_STRUCTURALLY_SYMMETRIC ||
610            op == MAT_YES_NEW_DIAGONALS)
611     PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n");
612   else if (op == MAT_COLUMN_ORIENTED)
613     { a->roworiented = 0; MatSetOption(a->A,op);}
614   else if (op == MAT_NO_NEW_DIAGONALS)
615     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:MAT_NO_NEW_DIAGONALS");}
616   else
617     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
618   return 0;
619 }
620 
621 static int MatGetSize_MPIDense(Mat A,int *m,int *n)
622 {
623   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
624   *m = mat->M; *n = mat->N;
625   return 0;
626 }
627 
628 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
629 {
630   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
631   *m = mat->m; *n = mat->N;
632   return 0;
633 }
634 
635 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
636 {
637   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
638   *m = mat->rstart; *n = mat->rend;
639   return 0;
640 }
641 
642 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
643 {
644   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
645   int          lrow, rstart = mat->rstart, rend = mat->rend;
646 
647   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
648   lrow = row - rstart;
649   return MatGetRow(mat->A,lrow,nz,idx,v);
650 }
651 
652 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
653 {
654   if (idx) PetscFree(*idx);
655   if (v) PetscFree(*v);
656   return 0;
657 }
658 
659 static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
660 {
661   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
662   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
663   int          ierr, i, j;
664   double       sum = 0.0;
665   Scalar       *v = mat->v;
666 
667   if (mdn->size == 1) {
668     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
669   } else {
670     if (type == NORM_FROBENIUS) {
671       for (i=0; i<mat->n*mat->m; i++ ) {
672 #if defined(PETSC_COMPLEX)
673         sum += real(conj(*v)*(*v)); v++;
674 #else
675         sum += (*v)*(*v); v++;
676 #endif
677       }
678       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
679       *norm = sqrt(*norm);
680       PLogFlops(2*mat->n*mat->m);
681     }
682     else if (type == NORM_1) {
683       double *tmp, *tmp2;
684       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
685       tmp2 = tmp + mdn->N;
686       PetscMemzero(tmp,2*mdn->N*sizeof(double));
687       *norm = 0.0;
688       v = mat->v;
689       for ( j=0; j<mat->n; j++ ) {
690         for ( i=0; i<mat->m; i++ ) {
691           tmp[j] += PetscAbsScalar(*v);  v++;
692         }
693       }
694       MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
695       for ( j=0; j<mdn->N; j++ ) {
696         if (tmp2[j] > *norm) *norm = tmp2[j];
697       }
698       PetscFree(tmp);
699       PLogFlops(mat->n*mat->m);
700     }
701     else if (type == NORM_INFINITY) { /* max row norm */
702       double ntemp;
703       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
704       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
705     }
706     else {
707       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
708     }
709   }
710   return 0;
711 }
712 
713 static int MatTranspose_MPIDense(Mat A,Mat *matout)
714 {
715   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
716   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
717   Mat          B;
718   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
719   int          j, i, ierr;
720   Scalar       *v;
721 
722   if (matout == PETSC_NULL && M != N) {
723     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
724   }
725   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
726 
727   m = Aloc->m; n = Aloc->n; v = Aloc->v;
728   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
729   for ( j=0; j<n; j++ ) {
730     for (i=0; i<m; i++) rwork[i] = rstart + i;
731     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
732     v   += m;
733   }
734   PetscFree(rwork);
735   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
736   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
737   if (matout != PETSC_NULL) {
738     *matout = B;
739   } else {
740     /* This isn't really an in-place transpose, but free data struct from a */
741     PetscFree(a->rowners);
742     ierr = MatDestroy(a->A); CHKERRQ(ierr);
743     if (a->lvec) VecDestroy(a->lvec);
744     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
745     PetscFree(a);
746     PetscMemcpy(A,B,sizeof(struct _Mat));
747     PetscHeaderDestroy(B);
748   }
749   return 0;
750 }
751 
752 #include "pinclude/plapack.h"
753 static int MatScale_MPIDense(Scalar *alpha,Mat inA)
754 {
755   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
756   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
757   int          one = 1, nz;
758 
759   nz = a->m*a->n;
760   BLscal_( &nz, alpha, a->v, &one );
761   PLogFlops(nz);
762   return 0;
763 }
764 
765 static int MatConvertSameType_MPIDense(Mat,Mat *,int);
766 
767 /* -------------------------------------------------------------------*/
768 static struct _MatOps MatOps = {MatSetValues_MPIDense,
769        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
770        MatMult_MPIDense,MatMultAdd_MPIDense,
771        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
772 /*       MatSolve_MPIDense,0, */
773        0,0,
774        0,0,
775        0,0,
776 /*       MatLUFactor_MPIDense,0, */
777        0,MatTranspose_MPIDense,
778        MatGetInfo_MPIDense,0,
779        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
780        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
781        0,
782        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
783        0,0,0,
784 /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
785        0,0,
786        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
787        MatGetOwnershipRange_MPIDense,
788        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
789        0,MatConvertSameType_MPIDense,
790        0,0,0,0,0,
791        0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense};
792 
793 /*@C
794    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
795 
796    Input Parameters:
797 .  comm - MPI communicator
798 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
799 .  n - number of local columns (or PETSC_DECIDE to have calculated
800            if N is given)
801 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
802 .  N - number of global columns (or PETSC_DECIDE to have calculated
803            if n is given)
804 .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
805    to control all matrix memory allocation.
806 
807    Output Parameter:
808 .  A - the matrix
809 
810    Notes:
811    The dense format is fully compatible with standard Fortran 77
812    storage by columns.
813 
814    The data input variable is intended primarily for Fortran programmers
815    who wish to allocate their own matrix memory space.  Most users should
816    set data=PETSC_NULL.
817 
818    The user MUST specify either the local or global matrix dimensions
819    (possibly both).
820 
821    Currently, the only parallel dense matrix decomposition is by rows,
822    so that n=N and each submatrix owns all of the global columns.
823 
824 .keywords: matrix, dense, parallel
825 
826 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
827 @*/
828 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
829 {
830   Mat          mat;
831   Mat_MPIDense *a;
832   int          ierr, i,flg;
833 
834   /* Note:  For now, when data is specified above, this assumes the user correctly
835    allocates the local dense storage space.  We should add error checking. */
836 
837   *A = 0;
838   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
839   PLogObjectCreate(mat);
840   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
841   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
842   mat->destroy    = MatDestroy_MPIDense;
843   mat->view       = MatView_MPIDense;
844   mat->factor     = 0;
845 
846   a->factor     = 0;
847   a->insertmode = NOT_SET_VALUES;
848   MPI_Comm_rank(comm,&a->rank);
849   MPI_Comm_size(comm,&a->size);
850 
851   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
852   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
853 
854   /* each row stores all columns */
855   if (N == PETSC_DECIDE) N = n;
856   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
857   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
858   a->N = mat->N = N;
859   a->M = mat->M = M;
860   a->m = mat->m = m;
861   a->n = mat->n = n;
862 
863   /* build local table of row and column ownerships */
864   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
865   a->cowners = a->rowners + a->size + 1;
866   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
867   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
868   a->rowners[0] = 0;
869   for ( i=2; i<=a->size; i++ ) {
870     a->rowners[i] += a->rowners[i-1];
871   }
872   a->rstart = a->rowners[a->rank];
873   a->rend   = a->rowners[a->rank+1];
874   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
875   a->cowners[0] = 0;
876   for ( i=2; i<=a->size; i++ ) {
877     a->cowners[i] += a->cowners[i-1];
878   }
879 
880   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
881   PLogObjectParent(mat,a->A);
882 
883   /* build cache for off array entries formed */
884   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
885 
886   /* stuff used for matrix vector multiply */
887   a->lvec        = 0;
888   a->Mvctx       = 0;
889   a->roworiented = 1;
890 
891   *A = mat;
892   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
893   if (flg) {
894     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
895   }
896   return 0;
897 }
898 
899 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
900 {
901   Mat          mat;
902   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
903   int          ierr;
904   FactorCtx    *factor;
905 
906   *newmat       = 0;
907   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
908   PLogObjectCreate(mat);
909   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
910   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
911   mat->destroy   = MatDestroy_MPIDense;
912   mat->view      = MatView_MPIDense;
913   mat->factor    = A->factor;
914   mat->assembled = PETSC_TRUE;
915 
916   a->m = mat->m = oldmat->m;
917   a->n = mat->n = oldmat->n;
918   a->M = mat->M = oldmat->M;
919   a->N = mat->N = oldmat->N;
920   if (oldmat->factor) {
921     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
922     /* copy factor contents ... add this code! */
923   } else a->factor = 0;
924 
925   a->rstart     = oldmat->rstart;
926   a->rend       = oldmat->rend;
927   a->size       = oldmat->size;
928   a->rank       = oldmat->rank;
929   a->insertmode = NOT_SET_VALUES;
930 
931   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
932   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
933   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
934   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
935 
936   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
937   PLogObjectParent(mat,a->lvec);
938   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
939   PLogObjectParent(mat,a->Mvctx);
940   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
941   PLogObjectParent(mat,a->A);
942   *newmat = mat;
943   return 0;
944 }
945 
946 #include "sys.h"
947 
948 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
949 {
950   int        *rowners, i,size,rank,m,rstart,rend,ierr,nz,j;
951   Scalar     *array,*vals,*vals_ptr;
952   MPI_Status status;
953 
954   MPI_Comm_rank(comm,&rank);
955   MPI_Comm_size(comm,&size);
956 
957   /* determine ownership of all rows */
958   m = M/size + ((M % size) > rank);
959   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
960   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
961   rowners[0] = 0;
962   for ( i=2; i<=size; i++ ) {
963     rowners[i] += rowners[i-1];
964   }
965   rstart = rowners[rank];
966   rend   = rowners[rank+1];
967 
968   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
969   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
970 
971   if (!rank) {
972     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
973 
974     /* read in my part of the matrix numerical values  */
975     ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr);
976 
977     /* insert into matrix-by row (this is why cannot directly read into array */
978     vals_ptr = vals;
979     for ( i=0; i<m; i++ ) {
980       for ( j=0; j<N; j++ ) {
981         array[i + j*m] = *vals_ptr++;
982       }
983     }
984 
985     /* read in other processors and ship out */
986     for ( i=1; i<size; i++ ) {
987       nz   = (rowners[i+1] - rowners[i])*N;
988       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
989       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
990     }
991   }
992   else {
993     /* receive numeric values */
994     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
995 
996     /* receive message of values*/
997     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
998 
999     /* insert into matrix-by row (this is why cannot directly read into array */
1000     vals_ptr = vals;
1001     for ( i=0; i<m; i++ ) {
1002       for ( j=0; j<N; j++ ) {
1003         array[i + j*m] = *vals_ptr++;
1004       }
1005     }
1006   }
1007   PetscFree(rowners);
1008   PetscFree(vals);
1009   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1010   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1011   return 0;
1012 }
1013 
1014 
1015 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1016 {
1017   Mat          A;
1018   int          i, nz, ierr, j,rstart, rend, fd;
1019   Scalar       *vals,*svals;
1020   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1021   MPI_Status   status;
1022   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1023   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1024   int          tag = ((PetscObject)viewer)->tag;
1025 
1026   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1027   if (!rank) {
1028     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1029     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1030     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
1031   }
1032 
1033   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1034   M = header[1]; N = header[2]; nz = header[3];
1035 
1036   /*
1037        Handle case where matrix is stored on disk as a dense matrix
1038   */
1039   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1040     return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1041   }
1042 
1043   /* determine ownership of all rows */
1044   m = M/size + ((M % size) > rank);
1045   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1046   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1047   rowners[0] = 0;
1048   for ( i=2; i<=size; i++ ) {
1049     rowners[i] += rowners[i-1];
1050   }
1051   rstart = rowners[rank];
1052   rend   = rowners[rank+1];
1053 
1054   /* distribute row lengths to all processors */
1055   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1056   offlens = ourlens + (rend-rstart);
1057   if (!rank) {
1058     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1059     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1060     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1061     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1062     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1063     PetscFree(sndcounts);
1064   }
1065   else {
1066     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1067   }
1068 
1069   if (!rank) {
1070     /* calculate the number of nonzeros on each processor */
1071     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1072     PetscMemzero(procsnz,size*sizeof(int));
1073     for ( i=0; i<size; i++ ) {
1074       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1075         procsnz[i] += rowlengths[j];
1076       }
1077     }
1078     PetscFree(rowlengths);
1079 
1080     /* determine max buffer needed and allocate it */
1081     maxnz = 0;
1082     for ( i=0; i<size; i++ ) {
1083       maxnz = PetscMax(maxnz,procsnz[i]);
1084     }
1085     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1086 
1087     /* read in my part of the matrix column indices  */
1088     nz = procsnz[0];
1089     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1090     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1091 
1092     /* read in every one elses and ship off */
1093     for ( i=1; i<size; i++ ) {
1094       nz = procsnz[i];
1095       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1096       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1097     }
1098     PetscFree(cols);
1099   }
1100   else {
1101     /* determine buffer space needed for message */
1102     nz = 0;
1103     for ( i=0; i<m; i++ ) {
1104       nz += ourlens[i];
1105     }
1106     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1107 
1108     /* receive message of column indices*/
1109     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1110     MPI_Get_count(&status,MPI_INT,&maxnz);
1111     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1112   }
1113 
1114   /* loop over local rows, determining number of off diagonal entries */
1115   PetscMemzero(offlens,m*sizeof(int));
1116   jj = 0;
1117   for ( i=0; i<m; i++ ) {
1118     for ( j=0; j<ourlens[i]; j++ ) {
1119       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1120       jj++;
1121     }
1122   }
1123 
1124   /* create our matrix */
1125   for ( i=0; i<m; i++ ) {
1126     ourlens[i] -= offlens[i];
1127   }
1128   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1129   A = *newmat;
1130   for ( i=0; i<m; i++ ) {
1131     ourlens[i] += offlens[i];
1132   }
1133 
1134   if (!rank) {
1135     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1136 
1137     /* read in my part of the matrix numerical values  */
1138     nz = procsnz[0];
1139     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1140 
1141     /* insert into matrix */
1142     jj      = rstart;
1143     smycols = mycols;
1144     svals   = vals;
1145     for ( i=0; i<m; i++ ) {
1146       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1147       smycols += ourlens[i];
1148       svals   += ourlens[i];
1149       jj++;
1150     }
1151 
1152     /* read in other processors and ship out */
1153     for ( i=1; i<size; i++ ) {
1154       nz = procsnz[i];
1155       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1156       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1157     }
1158     PetscFree(procsnz);
1159   }
1160   else {
1161     /* receive numeric values */
1162     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1163 
1164     /* receive message of values*/
1165     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1166     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1167     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1168 
1169     /* insert into matrix */
1170     jj      = rstart;
1171     smycols = mycols;
1172     svals   = vals;
1173     for ( i=0; i<m; i++ ) {
1174       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1175       smycols += ourlens[i];
1176       svals   += ourlens[i];
1177       jj++;
1178     }
1179   }
1180   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1181 
1182   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1183   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1184   return 0;
1185 }
1186 
1187 
1188 
1189 
1190 
1191