xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 052cd4255ec3f7ee9bedc246b8da08d832e6dd78)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.14 1995/11/29 22:08:17 curfman Exp curfman $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "mpidense.h"
10 #include "vec/vecimpl.h"
11 #include "inline/spops.h"
12 
13 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
14                                int *idxn,Scalar *v,InsertMode addv)
15 {
16   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
17   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
18 
19   if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) {
20     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
21   }
22   mdn->insertmode = addv;
23   for ( i=0; i<m; i++ ) {
24     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
25     if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
26     if (idxm[i] >= rstart && idxm[i] < rend) {
27       row = idxm[i] - rstart;
28       for ( j=0; j<n; j++ ) {
29         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
30         if (idxn[j] >= mdn->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
31         ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv); CHKERRQ(ierr);
32       }
33     }
34     else {
35       ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
36     }
37   }
38   return 0;
39 }
40 
41 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
42 {
43   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
44   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
45 
46   if (!mdn->assembled) SETERRQ(1,"MatGetValues_MPIDense:Not for unassembled matrix");
47   for ( i=0; i<m; i++ ) {
48     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
49     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
50     if (idxm[i] >= rstart && idxm[i] < rend) {
51       row = idxm[i] - rstart;
52       for ( j=0; j<n; j++ ) {
53         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
54         if (idxn[j] >= mdn->N)
55           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
56         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
57       }
58     }
59     else {
60       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
61     }
62   }
63   return 0;
64 }
65 
66 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
67 {
68   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
69   MPI_Comm     comm = mat->comm;
70   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
71   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
72   int          tag = mat->tag, *owner,*starts,count,ierr;
73   InsertMode   addv;
74   MPI_Request  *send_waits,*recv_waits;
75   Scalar       *rvalues,*svalues;
76 
77   /* make sure all processors are either in INSERTMODE or ADDMODE */
78   MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT,
79                 MPI_BOR,comm);
80   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
81     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
82     }
83   mdn->insertmode = addv; /* in case this processor had no cache */
84 
85   /*  first count number of contributors to each processor */
86   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
87   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
88   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
89   for ( i=0; i<mdn->stash.n; i++ ) {
90     idx = mdn->stash.idx[i];
91     for ( j=0; j<size; j++ ) {
92       if (idx >= owners[j] && idx < owners[j+1]) {
93         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
94       }
95     }
96   }
97   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
98 
99   /* inform other processors of number of messages and max length*/
100   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
101   MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm);
102   nreceives = work[rank];
103   MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm);
104   nmax = work[rank];
105   PetscFree(work);
106 
107   /* post receives:
108        1) each message will consist of ordered pairs
109      (global index,value) we store the global index as a double
110      to simplify the message passing.
111        2) since we don't know how long each individual message is we
112      allocate the largest needed buffer for each receive. Potentially
113      this is a lot of wasted space.
114 
115        This could be done better.
116   */
117   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
118   CHKPTRQ(rvalues);
119   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
120   CHKPTRQ(recv_waits);
121   for ( i=0; i<nreceives; i++ ) {
122     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
123               comm,recv_waits+i);
124   }
125 
126   /* do sends:
127       1) starts[i] gives the starting index in svalues for stuff going to
128          the ith processor
129   */
130   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
131   CHKPTRQ(svalues);
132   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
133   CHKPTRQ(send_waits);
134   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
135   starts[0] = 0;
136   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
137   for ( i=0; i<mdn->stash.n; i++ ) {
138     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
139     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
140     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
141   }
142   PetscFree(owner);
143   starts[0] = 0;
144   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
145   count = 0;
146   for ( i=0; i<size; i++ ) {
147     if (procs[i]) {
148       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
149                 comm,send_waits+count++);
150     }
151   }
152   PetscFree(starts); PetscFree(nprocs);
153 
154   /* Free cache space */
155   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
156 
157   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
158   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
159   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
160   mdn->rmax       = nmax;
161 
162   return 0;
163 }
164 extern int MatSetUpMultiply_MPIDense(Mat);
165 
166 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
167 {
168   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
169   MPI_Status   *send_status,recv_status;
170   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
171   Scalar       *values,val;
172   InsertMode   addv = mdn->insertmode;
173 
174   /*  wait on receives */
175   while (count) {
176     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
177     /* unpack receives into our local space */
178     values = mdn->rvalues + 3*imdex*mdn->rmax;
179     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
180     n = n/3;
181     for ( i=0; i<n; i++ ) {
182       row = (int) PETSCREAL(values[3*i]) - mdn->rstart;
183       col = (int) PETSCREAL(values[3*i+1]);
184       val = values[3*i+2];
185       if (col >= 0 && col < mdn->N) {
186         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
187       }
188       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
189     }
190     count--;
191   }
192   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
193 
194   /* wait on sends */
195   if (mdn->nsends) {
196     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
197     CHKPTRQ(send_status);
198     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
199     PetscFree(send_status);
200   }
201   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
202 
203   mdn->insertmode = NOT_SET_VALUES;
204   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
205   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
206 
207   if (!mdn->assembled && mode == FINAL_ASSEMBLY) {
208     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
209   }
210   mdn->assembled = 1;
211   return 0;
212 }
213 
214 static int MatZeroEntries_MPIDense(Mat A)
215 {
216   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
217   return MatZeroEntries(l->A);
218 }
219 
220 /* the code does not do the diagonal entries correctly unless the
221    matrix is square and the column and row owerships are identical.
222    This is a BUG. The only way to fix it seems to be to access
223    mdn->A and mdn->B directly and not through the MatZeroRows()
224    routine.
225 */
226 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
227 {
228   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
229   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
230   int            *procs,*nprocs,j,found,idx,nsends,*work;
231   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
232   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
233   int            *lens,imdex,*lrows,*values;
234   MPI_Comm       comm = A->comm;
235   MPI_Request    *send_waits,*recv_waits;
236   MPI_Status     recv_status,*send_status;
237   IS             istmp;
238 
239   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix");
240   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
241   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
242 
243   /*  first count number of contributors to each processor */
244   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
245   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
246   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
247   for ( i=0; i<N; i++ ) {
248     idx = rows[i];
249     found = 0;
250     for ( j=0; j<size; j++ ) {
251       if (idx >= owners[j] && idx < owners[j+1]) {
252         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
253       }
254     }
255     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
256   }
257   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
258 
259   /* inform other processors of number of messages and max length*/
260   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
261   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
262   nrecvs = work[rank];
263   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
264   nmax = work[rank];
265   PetscFree(work);
266 
267   /* post receives:   */
268   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
269   CHKPTRQ(rvalues);
270   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
271   CHKPTRQ(recv_waits);
272   for ( i=0; i<nrecvs; i++ ) {
273     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
274   }
275 
276   /* do sends:
277       1) starts[i] gives the starting index in svalues for stuff going to
278          the ith processor
279   */
280   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
281   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
282   CHKPTRQ(send_waits);
283   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
284   starts[0] = 0;
285   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
286   for ( i=0; i<N; i++ ) {
287     svalues[starts[owner[i]]++] = rows[i];
288   }
289   ISRestoreIndices(is,&rows);
290 
291   starts[0] = 0;
292   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
293   count = 0;
294   for ( i=0; i<size; i++ ) {
295     if (procs[i]) {
296       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
297     }
298   }
299   PetscFree(starts);
300 
301   base = owners[rank];
302 
303   /*  wait on receives */
304   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
305   source = lens + nrecvs;
306   count  = nrecvs; slen = 0;
307   while (count) {
308     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
309     /* unpack receives into our local space */
310     MPI_Get_count(&recv_status,MPI_INT,&n);
311     source[imdex]  = recv_status.MPI_SOURCE;
312     lens[imdex]  = n;
313     slen += n;
314     count--;
315   }
316   PetscFree(recv_waits);
317 
318   /* move the data into the send scatter */
319   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
320   count = 0;
321   for ( i=0; i<nrecvs; i++ ) {
322     values = rvalues + i*nmax;
323     for ( j=0; j<lens[i]; j++ ) {
324       lrows[count++] = values[j] - base;
325     }
326   }
327   PetscFree(rvalues); PetscFree(lens);
328   PetscFree(owner); PetscFree(nprocs);
329 
330   /* actually zap the local rows */
331   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
332   PLogObjectParent(A,istmp);
333   PetscFree(lrows);
334   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
335   ierr = ISDestroy(istmp); CHKERRQ(ierr);
336 
337   /* wait on sends */
338   if (nsends) {
339     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
340     CHKPTRQ(send_status);
341     MPI_Waitall(nsends,send_waits,send_status);
342     PetscFree(send_status);
343   }
344   PetscFree(send_waits); PetscFree(svalues);
345 
346   return 0;
347 }
348 
349 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
350 {
351   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
352   int          ierr;
353   if (!mdn->assembled)
354     SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first");
355   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
356   CHKERRQ(ierr);
357   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
358   CHKERRQ(ierr);
359   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
360   return 0;
361 }
362 
363 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
364 {
365   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
366   int          ierr;
367   if (!mdn->assembled)
368     SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first");
369   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
370   CHKERRQ(ierr);
371   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
372   CHKERRQ(ierr);
373   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
374   return 0;
375 }
376 
377 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
378 {
379   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
380   int          ierr;
381   Scalar       zero = 0.0;
382 
383   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix");
384   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
385   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
386   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
387          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
388   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
389          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
390   return 0;
391 }
392 
393 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
394 {
395   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
396   int          ierr;
397 
398   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix");
399   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
400   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
401   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
402          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
403   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
404          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
405   return 0;
406 }
407 
408 static int MatGetDiagonal_MPIDense(Mat A,Vec v)
409 {
410   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
411   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
412   int          ierr, i, n, m = a->m, radd;
413   Scalar       *x;
414 
415   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix");
416   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
417   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
418   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
419   radd = a->rstart*m*m;
420   for ( i=0; i<m; i++ ) {
421     x[i] = aloc->v[radd + i*m + i];
422   }
423   return 0;
424 }
425 
426 static int MatDestroy_MPIDense(PetscObject obj)
427 {
428   Mat          mat = (Mat) obj;
429   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
430   int          ierr;
431 
432 #if defined(PETSC_LOG)
433   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
434 #endif
435   PetscFree(mdn->rowners);
436   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
437   if (mdn->lvec)   VecDestroy(mdn->lvec);
438   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
439   if (mdn->factor) PetscFree(mdn->factor);
440   PetscFree(mdn);
441   PLogObjectDestroy(mat);
442   PetscHeaderDestroy(mat);
443   return 0;
444 }
445 
446 #include "pinclude/pviewer.h"
447 
448 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
449 {
450   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
451   int          ierr;
452   if (mdn->size == 1) {
453     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
454   }
455   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
456   return 0;
457 }
458 
459 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
460 {
461   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
462   int          ierr, format;
463   PetscObject  vobj = (PetscObject) viewer;
464   FILE         *fd;
465 
466   ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
467   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
468     ierr = ViewerFileGetFormat_Private(viewer,&format);
469     if (format == FILE_FORMAT_INFO_DETAILED) {
470       int nz, nzalloc, mem, rank;
471       MPI_Comm_rank(mat->comm,&rank);
472       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
473       MPIU_Seq_begin(mat->comm,1);
474         fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
475             rank,mdn->m,nz,nzalloc,mem);
476       fflush(fd);
477       MPIU_Seq_end(mat->comm,1);
478       ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
479       return 0;
480     }
481     else if (format == FILE_FORMAT_INFO) {
482       return 0;
483     }
484   }
485   if (vobj->type == ASCII_FILE_VIEWER) {
486     MPIU_Seq_begin(mat->comm,1);
487     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
488              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
489     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
490     fflush(fd);
491     MPIU_Seq_end(mat->comm,1);
492   }
493   else {
494     int size = mdn->size, rank = mdn->rank;
495     if (size == 1) {
496       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
497     }
498     else {
499       /* assemble the entire matrix onto first processor. */
500       Mat          A;
501       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
502       Scalar       *vals;
503       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
504 
505       if (!rank) {
506         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,0,&A); CHKERRQ(ierr);
507       }
508       else {
509         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,0,&A); CHKERRQ(ierr);
510       }
511       PLogObjectParent(mat,A);
512 
513       /* Copy the matrix ... This isn't the most efficient means,
514          but it's quick for now */
515       row = mdn->rstart; m = Amdn->m;
516       for ( i=0; i<m; i++ ) {
517         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
518         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
519         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
520         row++;
521       }
522 
523       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
524       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
525       if (!rank) {
526         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
527       }
528       ierr = MatDestroy(A); CHKERRQ(ierr);
529     }
530   }
531   return 0;
532 }
533 
534 static int MatView_MPIDense(PetscObject obj,Viewer viewer)
535 {
536   Mat          mat = (Mat) obj;
537   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
538   PetscObject  vobj = (PetscObject) viewer;
539   int          ierr;
540 
541   if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix");
542   if (!viewer) {
543     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
544   }
545   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
546     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
547   }
548   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
549     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
550   }
551   else if (vobj->type == BINARY_FILE_VIEWER) {
552     return MatView_MPIDense_Binary(mat,viewer);
553   }
554   return 0;
555 }
556 
557 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
558                              int *nzalloc,int *mem)
559 {
560   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
561   Mat          mdn = mat->A;
562   int          ierr, isend[3], irecv[3];
563 
564   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
565   if (flag == MAT_LOCAL) {
566     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
567   } else if (flag == MAT_GLOBAL_MAX) {
568     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
569     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
570   } else if (flag == MAT_GLOBAL_SUM) {
571     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
572     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
573   }
574   return 0;
575 }
576 
577 static int MatSetOption_MPIDense(Mat A,MatOption op)
578 {
579   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
580 
581   if (op == NO_NEW_NONZERO_LOCATIONS ||
582       op == YES_NEW_NONZERO_LOCATIONS ||
583       op == COLUMNS_SORTED ||
584       op == ROW_ORIENTED) {
585         MatSetOption(a->A,op);
586   }
587   else if (op == ROWS_SORTED ||
588            op == SYMMETRIC_MATRIX ||
589            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
590            op == YES_NEW_DIAGONALS)
591     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
592   else if (op == COLUMN_ORIENTED)
593     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");}
594   else if (op == NO_NEW_DIAGONALS)
595     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
596   else
597     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
598   return 0;
599 }
600 
601 static int MatGetSize_MPIDense(Mat A,int *m,int *n)
602 {
603   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
604   *m = mat->M; *n = mat->N;
605   return 0;
606 }
607 
608 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
609 {
610   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
611   *m = mat->m; *n = mat->N;
612   return 0;
613 }
614 
615 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
616 {
617   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
618   *m = mat->rstart; *n = mat->rend;
619   return 0;
620 }
621 
622 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
623 {
624   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
625   int          lrow, rstart = mat->rstart, rend = mat->rend;
626 
627   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
628   lrow = row - rstart;
629   return MatGetRow(mat->A,lrow,nz,idx,v);
630 }
631 
632 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
633 {
634   if (idx) PetscFree(*idx);
635   if (v) PetscFree(*v);
636   return 0;
637 }
638 
639 static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
640 {
641   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
642   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
643   int          ierr, i, j;
644   double       sum = 0.0;
645   Scalar       *v = mat->v;
646 
647   if (!mdn->assembled) SETERRQ(1,"MatNorm_MPIDense:Must assemble matrix");
648   if (mdn->size == 1) {
649     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
650   } else {
651     if (type == NORM_FROBENIUS) {
652       for (i=0; i<mat->n*mat->m; i++ ) {
653 #if defined(PETSC_COMPLEX)
654         sum += real(conj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
660       *norm = sqrt(*norm);
661       PLogFlops(2*mat->n*mat->m);
662     }
663     else if (type == NORM_1) {
664       double *tmp, *tmp2;
665       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
666       tmp2 = tmp + mdn->N;
667       PetscMemzero(tmp,2*mdn->N*sizeof(double));
668       *norm = 0.0;
669       v = mat->v;
670       for ( j=0; j<mat->n; j++ ) {
671         for ( i=0; i<mat->m; i++ ) {
672           tmp[j] += PetscAbsScalar(*v);  v++;
673         }
674       }
675       MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
676       for ( j=0; j<mdn->N; j++ ) {
677         if (tmp2[j] > *norm) *norm = tmp2[j];
678       }
679       PetscFree(tmp);
680       PLogFlops(mat->n*mat->m);
681     }
682     else if (type == NORM_INFINITY) { /* max row norm */
683       double ntemp;
684       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
685       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
686     }
687     else {
688       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
689     }
690   }
691   return 0;
692 }
693 
694 static int MatTranspose_MPIDense(Mat A,Mat *matout)
695 {
696   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
697   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
698   Mat          B;
699   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
700   int          j, i, ierr;
701   Scalar       *v;
702 
703   if (!matout && M != N)
704     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
705   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,&B); CHKERRQ(ierr);
706 
707   m = Aloc->m; n = Aloc->n; v = Aloc->v;
708   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
709   for ( j=0; j<n; j++ ) {
710     for (i=0; i<m; i++) rwork[i] = rstart + i;
711     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
712     v += m;
713   }
714   PetscFree(rwork);
715   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
716   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
717   if (matout) {
718     *matout = B;
719   } else {
720     /* This isn't really an in-place transpose, but free data struct from a */
721     PetscFree(a->rowners);
722     ierr = MatDestroy(a->A); CHKERRQ(ierr);
723     if (a->lvec) VecDestroy(a->lvec);
724     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
725     PetscFree(a);
726     PetscMemcpy(A,B,sizeof(struct _Mat));
727     PetscHeaderDestroy(B);
728   }
729   return 0;
730 }
731 
732 static int MatCopyPrivate_MPIDense(Mat,Mat *,int);
733 
734 /* -------------------------------------------------------------------*/
735 static struct _MatOps MatOps = {MatSetValues_MPIDense,
736        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
737        MatMult_MPIDense,MatMultAdd_MPIDense,
738        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
739        0,0,
740        0,0,0,
741        0,0,MatTranspose_MPIDense,
742        MatGetInfo_MPIDense,0,
743        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
744        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
745        0,
746        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
747        0,
748        0,0,0,0,
749        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
750        MatGetOwnershipRange_MPIDense,
751        0,0,
752        0,0,0,0,0,MatCopyPrivate_MPIDense,
753        0,0,0,0,0,
754        0,0,MatGetValues_MPIDense};
755 
756 /*@C
757    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
758 
759    Input Parameters:
760 .  comm - MPI communicator
761 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
762 .  n - number of local columns (or PETSC_DECIDE to have calculated
763            if N is given)
764 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
765 .  N - number of global columns (or PETSC_DECIDE to have calculated
766            if n is given)
767 .  data - optional location of matrix data.  Set data=PetscNull for PETSc
768    to control all matrix memory allocation.
769 
770    Output Parameter:
771 .  newmat - the matrix
772 
773    Notes:
774    The dense format is fully compatible with standard Fortran 77
775    storage by columns.
776 
777    The data input variable is intended primarily for Fortran programmers
778    who wish to allocate their own matrix memory space.  Most users should
779    set data=PetscNull.
780 
781    The user MUST specify either the local or global matrix dimensions
782    (possibly both).
783 
784    Currently, the only parallel dense matrix decomposition is by rows,
785    so that n=N and each submatrix owns all of the global columns.
786 
787 .keywords: matrix, dense, parallel
788 
789 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
790 @*/
791 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat)
792 {
793   Mat          mat;
794   Mat_MPIDense *a;
795   int          ierr, i;
796 
797 /* Note:  for now, this assumes that the user knows what he's doing if
798    data is specified above.  */
799 
800   *newmat         = 0;
801   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
802   PLogObjectCreate(mat);
803   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
804   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
805   mat->destroy    = MatDestroy_MPIDense;
806   mat->view       = MatView_MPIDense;
807   mat->factor     = 0;
808 
809   a->insertmode = NOT_SET_VALUES;
810   MPI_Comm_rank(comm,&a->rank);
811   MPI_Comm_size(comm,&a->size);
812 
813   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
814   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
815 
816   /* each row stores all columns */
817   if (N == PETSC_DECIDE) N = n;
818   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
819   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
820   a->N = N;
821   a->M = M;
822   a->m = m;
823   a->n = n;
824 
825   /* build local table of row and column ownerships */
826   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
827   a->cowners = a->rowners + a->size + 1;
828   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
829                        sizeof(Mat_MPIDense));
830   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
831   a->rowners[0] = 0;
832   for ( i=2; i<=a->size; i++ ) {
833     a->rowners[i] += a->rowners[i-1];
834   }
835   a->rstart = a->rowners[a->rank];
836   a->rend   = a->rowners[a->rank+1];
837   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
838   a->cowners[0] = 0;
839   for ( i=2; i<=a->size; i++ ) {
840     a->cowners[i] += a->cowners[i-1];
841   }
842 
843   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
844   PLogObjectParent(mat,a->A);
845 
846   /* build cache for off array entries formed */
847   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
848 
849   /* stuff used for matrix vector multiply */
850   a->lvec      = 0;
851   a->Mvctx     = 0;
852   a->assembled = 0;
853 
854   *newmat = mat;
855   return 0;
856 }
857 
858 static int MatCopyPrivate_MPIDense(Mat A,Mat *newmat,int cpvalues)
859 {
860   Mat          mat;
861   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
862   int          ierr;
863 
864   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix");
865   *newmat       = 0;
866   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
867   PLogObjectCreate(mat);
868   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
869   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
870   mat->destroy  = MatDestroy_MPIDense;
871   mat->view     = MatView_MPIDense;
872   mat->factor   = A->factor;
873 
874   a->m          = oldmat->m;
875   a->n          = oldmat->n;
876   a->M          = oldmat->M;
877   a->N          = oldmat->N;
878 
879   a->assembled  = 1;
880   a->rstart     = oldmat->rstart;
881   a->rend       = oldmat->rend;
882   a->size       = oldmat->size;
883   a->rank       = oldmat->rank;
884   a->insertmode = NOT_SET_VALUES;
885 
886   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
887   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
888   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
889   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
890 
891   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
892   PLogObjectParent(mat,a->lvec);
893   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
894   PLogObjectParent(mat,a->Mvctx);
895   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
896   PLogObjectParent(mat,a->A);
897   *newmat = mat;
898   return 0;
899 }
900 
901 #include "sysio.h"
902 
903 int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
904 {
905   Mat          A;
906   int          i, nz, ierr, j,rstart, rend, fd;
907   Scalar       *vals,*svals;
908   PetscObject  vobj = (PetscObject) bview;
909   MPI_Comm     comm = vobj->comm;
910   MPI_Status   status;
911   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
912   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
913   int          tag = ((PetscObject)bview)->tag;
914 
915   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
916   if (!rank) {
917     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
918     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
919     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
920   }
921 
922   MPI_Bcast(header+1,3,MPI_INT,0,comm);
923   M = header[1]; N = header[2];
924   /* determine ownership of all rows */
925   m = M/size + ((M % size) > rank);
926   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
927   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
928   rowners[0] = 0;
929   for ( i=2; i<=size; i++ ) {
930     rowners[i] += rowners[i-1];
931   }
932   rstart = rowners[rank];
933   rend   = rowners[rank+1];
934 
935   /* distribute row lengths to all processors */
936   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
937   offlens = ourlens + (rend-rstart);
938   if (!rank) {
939     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
940     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
941     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
942     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
943     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
944     PetscFree(sndcounts);
945   }
946   else {
947     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
948   }
949 
950   if (!rank) {
951     /* calculate the number of nonzeros on each processor */
952     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
953     PetscMemzero(procsnz,size*sizeof(int));
954     for ( i=0; i<size; i++ ) {
955       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
956         procsnz[i] += rowlengths[j];
957       }
958     }
959     PetscFree(rowlengths);
960 
961     /* determine max buffer needed and allocate it */
962     maxnz = 0;
963     for ( i=0; i<size; i++ ) {
964       maxnz = PetscMax(maxnz,procsnz[i]);
965     }
966     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
967 
968     /* read in my part of the matrix column indices  */
969     nz = procsnz[0];
970     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
971     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
972 
973     /* read in every one elses and ship off */
974     for ( i=1; i<size; i++ ) {
975       nz = procsnz[i];
976       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
977       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
978     }
979     PetscFree(cols);
980   }
981   else {
982     /* determine buffer space needed for message */
983     nz = 0;
984     for ( i=0; i<m; i++ ) {
985       nz += ourlens[i];
986     }
987     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
988 
989     /* receive message of column indices*/
990     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
991     MPI_Get_count(&status,MPI_INT,&maxnz);
992     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
993   }
994 
995   /* loop over local rows, determining number of off diagonal entries */
996   PetscMemzero(offlens,m*sizeof(int));
997   jj = 0;
998   for ( i=0; i<m; i++ ) {
999     for ( j=0; j<ourlens[i]; j++ ) {
1000       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1001       jj++;
1002     }
1003   }
1004 
1005   /* create our matrix */
1006   for ( i=0; i<m; i++ ) {
1007     ourlens[i] -= offlens[i];
1008   }
1009   if (type == MATMPIDENSE) {
1010     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,0,newmat); CHKERRQ(ierr);
1011   }
1012   A = *newmat;
1013   for ( i=0; i<m; i++ ) {
1014     ourlens[i] += offlens[i];
1015   }
1016 
1017   if (!rank) {
1018     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1019 
1020     /* read in my part of the matrix numerical values  */
1021     nz = procsnz[0];
1022     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1023 
1024     /* insert into matrix */
1025     jj      = rstart;
1026     smycols = mycols;
1027     svals   = vals;
1028     for ( i=0; i<m; i++ ) {
1029       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1030       smycols += ourlens[i];
1031       svals   += ourlens[i];
1032       jj++;
1033     }
1034 
1035     /* read in other processors and ship out */
1036     for ( i=1; i<size; i++ ) {
1037       nz = procsnz[i];
1038       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1039       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1040     }
1041     PetscFree(procsnz);
1042   }
1043   else {
1044     /* receive numeric values */
1045     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1046 
1047     /* receive message of values*/
1048     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1049     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1050     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1051 
1052     /* insert into matrix */
1053     jj      = rstart;
1054     smycols = mycols;
1055     svals   = vals;
1056     for ( i=0; i<m; i++ ) {
1057       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1058       smycols += ourlens[i];
1059       svals   += ourlens[i];
1060       jj++;
1061     }
1062   }
1063   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1064 
1065   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1066   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1067   return 0;
1068 }
1069