xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 39ddd567ee2cd5629f9b2e9aadacf11d968d0842)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.1 1995/10/19 22:14:22 curfman Exp curfman $";
3 #endif
4 
5 #include "mpidense.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
10                                int *idxn,Scalar *v,InsertMode addv)
11 {
12   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
13   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
14 
15   if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) {
16     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
17   }
18   mdn->insertmode = addv;
19   for ( i=0; i<m; i++ ) {
20     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
21     if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
22     if (idxm[i] >= rstart && idxm[i] < rend) {
23       row = idxm[i] - rstart;
24       for ( j=0; j<n; j++ ) {
25         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
26         if (idxn[j] >= mdn->N)
27           SETERRQ(1,"MatSetValues_MPIDense:Column too large");
28         ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv);
29         CHKERRQ(ierr);
30       }
31     }
32     else {
33       ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv);
34       CHKERRQ(ierr);
35     }
36   }
37   return 0;
38 }
39 
40 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
41 {
42   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
43   MPI_Comm     comm = mat->comm;
44   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
45   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
46   int          tag = mat->tag, *owner,*starts,count,ierr;
47   InsertMode   addv;
48   MPI_Request  *send_waits,*recv_waits;
49   Scalar       *rvalues,*svalues;
50 
51   /* make sure all processors are either in INSERTMODE or ADDMODE */
52   MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT,
53                 MPI_BOR,comm);
54   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
55     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
56     }
57   mdn->insertmode = addv; /* in case this processor had no cache */
58 
59   /*  first count number of contributors to each processor */
60   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
61   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
62   owner = (int *) PETSCMALLOC( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
63   for ( i=0; i<mdn->stash.n; i++ ) {
64     idx = mdn->stash.idx[i];
65     for ( j=0; j<size; j++ ) {
66       if (idx >= owners[j] && idx < owners[j+1]) {
67         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
68       }
69     }
70   }
71   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
72 
73   /* inform other processors of number of messages and max length*/
74   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
75   MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm);
76   nreceives = work[rank];
77   MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm);
78   nmax = work[rank];
79   PETSCFREE(work);
80 
81   /* post receives:
82        1) each message will consist of ordered pairs
83      (global index,value) we store the global index as a double
84      to simplify the message passing.
85        2) since we don't know how long each individual message is we
86      allocate the largest needed buffer for each receive. Potentially
87      this is a lot of wasted space.
88 
89        This could be done better.
90   */
91   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
92   CHKPTRQ(rvalues);
93   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
94   CHKPTRQ(recv_waits);
95   for ( i=0; i<nreceives; i++ ) {
96     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
97               comm,recv_waits+i);
98   }
99 
100   /* do sends:
101       1) starts[i] gives the starting index in svalues for stuff going to
102          the ith processor
103   */
104   svalues = (Scalar *) PETSCMALLOC( 3*(mdn->stash.n+1)*sizeof(Scalar) );
105   CHKPTRQ(svalues);
106   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
107   CHKPTRQ(send_waits);
108   starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts);
109   starts[0] = 0;
110   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
111   for ( i=0; i<mdn->stash.n; i++ ) {
112     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
113     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
114     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
115   }
116   PETSCFREE(owner);
117   starts[0] = 0;
118   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
119   count = 0;
120   for ( i=0; i<size; i++ ) {
121     if (procs[i]) {
122       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
123                 comm,send_waits+count++);
124     }
125   }
126   PETSCFREE(starts); PETSCFREE(nprocs);
127 
128   /* Free cache space */
129   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
130 
131   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
132   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
133   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
134   mdn->rmax       = nmax;
135 
136   return 0;
137 }
138 extern int MatSetUpMultiply_MPIDense(Mat);
139 
140 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
141 {
142   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
143   MPI_Status   *send_status,recv_status;
144   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
145   Scalar       *values,val;
146   InsertMode   addv = mdn->insertmode;
147 
148   /*  wait on receives */
149   while (count) {
150     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
151     /* unpack receives into our local space */
152     values = mdn->rvalues + 3*imdex*mdn->rmax;
153     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
154     n = n/3;
155     for ( i=0; i<n; i++ ) {
156       row = (int) PETSCREAL(values[3*i]) - mdn->rstart;
157       col = (int) PETSCREAL(values[3*i+1]);
158       val = values[3*i+2];
159       if (col >= 0 && col < mdn->N) {
160         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
161       }
162       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
163     }
164     count--;
165   }
166   PETSCFREE(mdn->recv_waits); PETSCFREE(mdn->rvalues);
167 
168   /* wait on sends */
169   if (mdn->nsends) {
170     send_status = (MPI_Status *) PETSCMALLOC( mdn->nsends*sizeof(MPI_Status) );
171     CHKPTRQ(send_status);
172     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
173     PETSCFREE(send_status);
174   }
175   PETSCFREE(mdn->send_waits); PETSCFREE(mdn->svalues);
176 
177   mdn->insertmode = NOT_SET_VALUES;
178   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
179   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
180 
181   if (!mdn->assembled && mode == FINAL_ASSEMBLY) {
182     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
183   }
184   mdn->assembled = 1;
185   return 0;
186 }
187 
188 static int MatZeroEntries_MPIDense(Mat A)
189 {
190   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
191   return MatZeroEntries(l->A);
192 }
193 
194 /* the code does not do the diagonal entries correctly unless the
195    matrix is square and the column and row owerships are identical.
196    This is a BUG. The only way to fix it seems to be to access
197    aij->A and aij->B directly and not through the MatZeroRows()
198    routine.
199 */
200 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
201 {
202   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
203   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
204   int            *procs,*nprocs,j,found,idx,nsends,*work;
205   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
206   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
207   int            *lens,imdex,*lrows,*values;
208   MPI_Comm       comm = A->comm;
209   MPI_Request    *send_waits,*recv_waits;
210   MPI_Status     recv_status,*send_status;
211   IS             istmp;
212 
213   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix");
214   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
215   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
216 
217   /*  first count number of contributors to each processor */
218   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
219   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
220   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
221   for ( i=0; i<N; i++ ) {
222     idx = rows[i];
223     found = 0;
224     for ( j=0; j<size; j++ ) {
225       if (idx >= owners[j] && idx < owners[j+1]) {
226         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
227       }
228     }
229     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
230   }
231   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
232 
233   /* inform other processors of number of messages and max length*/
234   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
235   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
236   nrecvs = work[rank];
237   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
238   nmax = work[rank];
239   PETSCFREE(work);
240 
241   /* post receives:   */
242   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
243   CHKPTRQ(rvalues);
244   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
245   CHKPTRQ(recv_waits);
246   for ( i=0; i<nrecvs; i++ ) {
247     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
248   }
249 
250   /* do sends:
251       1) starts[i] gives the starting index in svalues for stuff going to
252          the ith processor
253   */
254   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
255   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
256   CHKPTRQ(send_waits);
257   starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts);
258   starts[0] = 0;
259   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
260   for ( i=0; i<N; i++ ) {
261     svalues[starts[owner[i]]++] = rows[i];
262   }
263   ISRestoreIndices(is,&rows);
264 
265   starts[0] = 0;
266   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
267   count = 0;
268   for ( i=0; i<size; i++ ) {
269     if (procs[i]) {
270       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
271     }
272   }
273   PETSCFREE(starts);
274 
275   base = owners[rank];
276 
277   /*  wait on receives */
278   lens   = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
279   source = lens + nrecvs;
280   count  = nrecvs; slen = 0;
281   while (count) {
282     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
283     /* unpack receives into our local space */
284     MPI_Get_count(&recv_status,MPI_INT,&n);
285     source[imdex]  = recv_status.MPI_SOURCE;
286     lens[imdex]  = n;
287     slen += n;
288     count--;
289   }
290   PETSCFREE(recv_waits);
291 
292   /* move the data into the send scatter */
293   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
294   count = 0;
295   for ( i=0; i<nrecvs; i++ ) {
296     values = rvalues + i*nmax;
297     for ( j=0; j<lens[i]; j++ ) {
298       lrows[count++] = values[j] - base;
299     }
300   }
301   PETSCFREE(rvalues); PETSCFREE(lens);
302   PETSCFREE(owner); PETSCFREE(nprocs);
303 
304   /* actually zap the local rows */
305   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
306   PLogObjectParent(A,istmp);
307   PETSCFREE(lrows);
308   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
309   ierr = ISDestroy(istmp); CHKERRQ(ierr);
310 
311   /* wait on sends */
312   if (nsends) {
313     send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status));
314     CHKPTRQ(send_status);
315     MPI_Waitall(nsends,send_waits,send_status);
316     PETSCFREE(send_status);
317   }
318   PETSCFREE(send_waits); PETSCFREE(svalues);
319 
320   return 0;
321 }
322 
323 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
324 {
325   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
326   int          ierr;
327   if (!mdn->assembled)
328     SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first");
329   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
330   CHKERRQ(ierr);
331   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
332   CHKERRQ(ierr);
333   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
334   return 0;
335 }
336 
337 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
338 {
339   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
340   int          ierr;
341   if (!mdn->assembled)
342     SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first");
343   ierr = VecScatterBegin(xx,mdn->lvec,ADD_VALUES,SCATTER_ALL,mdn->Mvctx);
344   CHKERRQ(ierr);
345   ierr = VecScatterEnd(xx,mdn->lvec,ADD_VALUES,SCATTER_ALL,mdn->Mvctx);
346   CHKERRQ(ierr);
347   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
348   return 0;
349 }
350 
351 /*
352   This only works correctly for square matrices where the subblock A->A is the
353    diagonal block
354 */
355 static int MatGetDiagonal_MPIDense(Mat A,Vec v)
356 {
357   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
358   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix");
359   return MatGetDiagonal(a->A,v);
360 }
361 
362 static int MatDestroy_MPIDense(PetscObject obj)
363 {
364   Mat          mat = (Mat) obj;
365   Mat_MPIDense *aij = (Mat_MPIDense *) mat->data;
366   int          ierr;
367 #if defined(PETSC_LOG)
368   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
369 #endif
370   PETSCFREE(aij->rowners);
371   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
372   if (aij->lvec)   VecDestroy(aij->lvec);
373   if (aij->Mvctx)  VecScatterCtxDestroy(aij->Mvctx);
374   PETSCFREE(aij);
375   PLogObjectDestroy(mat);
376   PETSCHEADERDESTROY(mat);
377   return 0;
378 }
379 
380 #include "pinclude/pviewer.h"
381 
382 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
383 {
384   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
385   int          ierr;
386   if (mdn->size == 1) {
387     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
388   }
389   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
390   return 0;
391 }
392 
393 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
394 {
395   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
396   int          ierr, format;
397   PetscObject  vobj = (PetscObject) viewer;
398   FILE         *fd;
399 
400   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
401     ierr = ViewerFileGetFormat_Private(viewer,&format);
402     if (format == FILE_FORMAT_INFO) {
403       ; /* do nothing for now */
404     }
405   }
406 
407   if (vobj->type == ASCII_FILE_VIEWER) {
408     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
409     MPIU_Seq_begin(mat->comm,1);
410     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
411              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
412     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
413     fflush(fd);
414     MPIU_Seq_end(mat->comm,1);
415   }
416   else {
417     int size = mdn->size, rank = mdn->rank;
418     if (size == 1) {
419       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
420     }
421     else {
422       /* assemble the entire matrix onto first processor. */
423       Mat       A;
424       int       M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
425       Scalar    *vals;
426       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
427 
428       if (!rank) {
429         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,&A); CHKERRQ(ierr);
430       }
431       else {
432         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,&A); CHKERRQ(ierr);
433       }
434       PLogObjectParent(mat,A);
435 
436       /* Copy the matrix ... This isn't the most efficient means,
437          but it's quick for now */
438       row = mdn->rstart; m = Amdn->m;
439       for ( i=0; i<m; i++ ) {
440         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
441         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
442         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
443         row++;
444       }
445 
446       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
447       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
448       if (!rank) {
449         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
450       }
451       ierr = MatDestroy(A); CHKERRQ(ierr);
452     }
453   }
454   return 0;
455 }
456 
457 static int MatView_MPIDense(PetscObject obj,Viewer viewer)
458 {
459   Mat          mat = (Mat) obj;
460   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
461   PetscObject  vobj = (PetscObject) viewer;
462   int          ierr;
463 
464   if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix");
465   if (!viewer) {
466     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
467   }
468   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
469     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
470   }
471   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
472     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
473   }
474   else if (vobj->type == BINARY_FILE_VIEWER) {
475     return MatView_MPIDense_Binary(mat,viewer);
476   }
477   return 0;
478 }
479 
480 static int MatGetInfo_MPIDense(Mat matin,MatInfoType flag,int *nz,
481                              int *nzalloc,int *mem)
482 {
483   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
484   Mat          A = mat->A;
485   int          ierr, isend[3], irecv[3];
486 
487   ierr = MatGetInfo(A,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
488   if (flag == MAT_LOCAL) {
489     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
490   } else if (flag == MAT_GLOBAL_MAX) {
491     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
492     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
493   } else if (flag == MAT_GLOBAL_SUM) {
494     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
495     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
496   }
497   return 0;
498 }
499 
500 static int MatSetOption_MPIDense(Mat A,MatOption op)
501 {
502   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
503 
504   if (op == NO_NEW_NONZERO_LOCATIONS ||
505       op == YES_NEW_NONZERO_LOCATIONS ||
506       op == COLUMNS_SORTED ||
507       op == ROW_ORIENTED) {
508         MatSetOption(a->A,op);
509   }
510   else if (op == ROWS_SORTED ||
511            op == SYMMETRIC_MATRIX ||
512            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
513            op == YES_NEW_DIAGONALS)
514     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
515   else if (op == COLUMN_ORIENTED)
516     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");}
517   else if (op == NO_NEW_DIAGONALS)
518     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
519   else
520     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
521   return 0;
522 }
523 
524 static int MatGetSize_MPIDense(Mat matin,int *m,int *n)
525 {
526   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
527   *m = mat->M; *n = mat->N;
528   return 0;
529 }
530 
531 static int MatGetLocalSize_MPIDense(Mat matin,int *m,int *n)
532 {
533   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
534   *m = mat->m; *n = mat->N;
535   return 0;
536 }
537 
538 static int MatGetOwnershipRange_MPIDense(Mat matin,int *m,int *n)
539 {
540   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
541   *m = mat->rstart; *n = mat->rend;
542   return 0;
543 }
544 
545 static int MatGetRow_MPIDense(Mat matin,int row,int *nz,int **idx,Scalar **v)
546 {
547   Mat_MPIDense *mat = (Mat_MPIDense *) matin->data;
548   int          lrow, rstart = mat->rstart, rend = mat->rend;
549 
550   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
551   lrow = row - rstart;
552   return MatGetRow(mat->A,lrow,nz,idx,v);
553 }
554 
555 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
556 {
557   if (idx) PETSCFREE(*idx);
558   if (v) PETSCFREE(*v);
559   return 0;
560 }
561 
562 static int MatCopyPrivate_MPIDense(Mat,Mat *);
563 
564 /* -------------------------------------------------------------------*/
565 static struct _MatOps MatOps = {MatSetValues_MPIDense,
566        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
567        MatMult_MPIDense,MatMultAdd_MPIDense,
568        0, 0,
569        0, 0,
570        0, 0, 0,
571        0, 0, 0,
572        MatGetInfo_MPIDense, 0,
573        MatGetDiagonal_MPIDense, 0, 0,
574        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
575        0,
576        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
577        0,
578        0, 0, 0, 0,
579        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
580        MatGetOwnershipRange_MPIDense,
581        0, 0,
582        0, 0, 0, 0, 0, MatCopyPrivate_MPIDense};
583 
584 /*@C
585    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
586 
587    Input Parameters:
588 .  comm - MPI communicator
589 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
590 .  n - number of local columns (or PETSC_DECIDE to have calculated
591            if N is given)
592 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
593 .  N - number of global columns (or PETSC_DECIDE to have calculated
594            if n is given)
595 
596    Output Parameter:
597 .  newmat - the matrix
598 
599    Notes:
600    The dense format is fully compatible with standard Fortran 77
601    storage by columns.
602 
603    The user MUST specify either the local or global matrix dimensions
604    (possibly both).
605 
606 .keywords: matrix, dense, parallel
607 
608 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
609 @*/
610 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Mat *newmat)
611 {
612   Mat          mat;
613   Mat_MPIDense *a;
614   int          ierr, i;
615 
616   *newmat         = 0;
617   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
618   PLogObjectCreate(mat);
619   mat->data       = (void *) (a = PETSCNEW(Mat_MPIDense)); CHKPTRQ(a);
620   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
621   mat->destroy    = MatDestroy_MPIDense;
622   mat->view       = MatView_MPIDense;
623   mat->factor     = 0;
624 
625   a->insertmode = NOT_SET_VALUES;
626   MPI_Comm_rank(comm,&a->rank);
627   MPI_Comm_size(comm,&a->size);
628 
629   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
630   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
631 
632   /* each row stores all columns */
633   if (N == PETSC_DECIDE) N = n;
634   if (n == PETSC_DECIDE) n = N;
635   if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported");
636   a->N = N;
637   a->M = M;
638   a->m = m;
639   a->n = n;
640 
641   /* build local table of row and column ownerships */
642   a->rowners = (int *) PETSCMALLOC((a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
643   PLogObjectMemory(mat,(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
644                        sizeof(Mat_MPIDense));
645   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
646   a->rowners[0] = 0;
647   for ( i=2; i<=a->size; i++ ) {
648     a->rowners[i] += a->rowners[i-1];
649   }
650   a->rstart = a->rowners[a->rank];
651   a->rend   = a->rowners[a->rank+1];
652 
653   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,&a->A); CHKERRQ(ierr);
654   PLogObjectParent(mat,a->A);
655 
656   /* build cache for off array entries formed */
657   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
658 
659   /* stuff used for matrix vector multiply */
660   a->lvec      = 0;
661   a->Mvctx     = 0;
662   a->assembled = 0;
663 
664   *newmat = mat;
665   return 0;
666 }
667 
668 static int MatCopyPrivate_MPIDense(Mat matin,Mat *newmat)
669 {
670   Mat        mat;
671   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) matin->data;
672   int        ierr;
673 
674   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix");
675   *newmat       = 0;
676   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIDENSE,matin->comm);
677   PLogObjectCreate(mat);
678   mat->data     = (void *) (a = PETSCNEW(Mat_MPIDense)); CHKPTRQ(a);
679   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
680   mat->destroy  = MatDestroy_MPIDense;
681   mat->view     = MatView_MPIDense;
682   mat->factor   = matin->factor;
683 
684   a->m          = oldmat->m;
685   a->n          = oldmat->n;
686   a->M          = oldmat->M;
687   a->N          = oldmat->N;
688 
689   a->assembled  = 1;
690   a->rstart     = oldmat->rstart;
691   a->rend       = oldmat->rend;
692   a->size       = oldmat->size;
693   a->rank       = oldmat->rank;
694   a->insertmode = NOT_SET_VALUES;
695 
696   a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
697   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
698   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
699   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
700 
701   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
702   PLogObjectParent(mat,a->lvec);
703   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
704   PLogObjectParent(mat,a->Mvctx);
705   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
706   PLogObjectParent(mat,a->A);
707   *newmat = mat;
708   return 0;
709 }
710 
711 #include "sysio.h"
712 
713 int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
714 {
715   Mat          A;
716   int          i, nz, ierr, j,rstart, rend, fd;
717   Scalar       *vals,*svals;
718   PetscObject  vobj = (PetscObject) bview;
719   MPI_Comm     comm = vobj->comm;
720   MPI_Status   status;
721   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
722   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
723   int          tag = ((PetscObject)bview)->tag;
724 
725   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
726   if (!rank) {
727     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
728     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
729     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
730   }
731 
732   MPI_Bcast(header+1,3,MPI_INT,0,comm);
733   M = header[1]; N = header[2];
734   /* determine ownership of all rows */
735   m = M/size + ((M % size) > rank);
736   rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners);
737   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
738   rowners[0] = 0;
739   for ( i=2; i<=size; i++ ) {
740     rowners[i] += rowners[i-1];
741   }
742   rstart = rowners[rank];
743   rend   = rowners[rank+1];
744 
745   /* distribute row lengths to all processors */
746   ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
747   offlens = ourlens + (rend-rstart);
748   if (!rank) {
749     rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths);
750     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
751     sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts);
752     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
753     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
754     PETSCFREE(sndcounts);
755   }
756   else {
757     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
758   }
759 
760   if (!rank) {
761     /* calculate the number of nonzeros on each processor */
762     procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz);
763     PetscZero(procsnz,size*sizeof(int));
764     for ( i=0; i<size; i++ ) {
765       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
766         procsnz[i] += rowlengths[j];
767       }
768     }
769     PETSCFREE(rowlengths);
770 
771     /* determine max buffer needed and allocate it */
772     maxnz = 0;
773     for ( i=0; i<size; i++ ) {
774       maxnz = PETSCMAX(maxnz,procsnz[i]);
775     }
776     cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols);
777 
778     /* read in my part of the matrix column indices  */
779     nz = procsnz[0];
780     mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
781     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
782 
783     /* read in every one elses and ship off */
784     for ( i=1; i<size; i++ ) {
785       nz = procsnz[i];
786       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
787       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
788     }
789     PETSCFREE(cols);
790   }
791   else {
792     /* determine buffer space needed for message */
793     nz = 0;
794     for ( i=0; i<m; i++ ) {
795       nz += ourlens[i];
796     }
797     mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
798 
799     /* receive message of column indices*/
800     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
801     MPI_Get_count(&status,MPI_INT,&maxnz);
802     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
803   }
804 
805   /* loop over local rows, determining number of off diagonal entries */
806   PetscZero(offlens,m*sizeof(int));
807   jj = 0;
808   for ( i=0; i<m; i++ ) {
809     for ( j=0; j<ourlens[i]; j++ ) {
810       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
811       jj++;
812     }
813   }
814 
815   /* create our matrix */
816   for ( i=0; i<m; i++ ) {
817     ourlens[i] -= offlens[i];
818   }
819   if (type == MATMPIDENSE) {
820     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
821   }
822   A = *newmat;
823   MatSetOption(A,COLUMNS_SORTED);
824   for ( i=0; i<m; i++ ) {
825     ourlens[i] += offlens[i];
826   }
827 
828   if (!rank) {
829     vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
830 
831     /* read in my part of the matrix numerical values  */
832     nz = procsnz[0];
833     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
834 
835     /* insert into matrix */
836     jj      = rstart;
837     smycols = mycols;
838     svals   = vals;
839     for ( i=0; i<m; i++ ) {
840       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
841       smycols += ourlens[i];
842       svals   += ourlens[i];
843       jj++;
844     }
845 
846     /* read in other processors and ship out */
847     for ( i=1; i<size; i++ ) {
848       nz = procsnz[i];
849       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
850       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
851     }
852     PETSCFREE(procsnz);
853   }
854   else {
855     /* receive numeric values */
856     vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals);
857 
858     /* receive message of values*/
859     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
860     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
861     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
862 
863     /* insert into matrix */
864     jj      = rstart;
865     smycols = mycols;
866     svals   = vals;
867     for ( i=0; i<m; i++ ) {
868       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
869       smycols += ourlens[i];
870       svals   += ourlens[i];
871       jj++;
872     }
873   }
874   PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners);
875 
876   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
877   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
878   return 0;
879 }
880