xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision d7e8b82609919f5be22f34483b29990a2bb120b8)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.10 1995/11/03 02:59:17 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "mpidense.h"
10 #include "vec/vecimpl.h"
11 #include "inline/spops.h"
12 
13 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
14                                int *idxn,Scalar *v,InsertMode addv)
15 {
16   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
17   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
18 
19   if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) {
20     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
21   }
22   mdn->insertmode = addv;
23   for ( i=0; i<m; i++ ) {
24     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
25     if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
26     if (idxm[i] >= rstart && idxm[i] < rend) {
27       row = idxm[i] - rstart;
28       for ( j=0; j<n; j++ ) {
29         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
30         if (idxn[j] >= mdn->N)
31           SETERRQ(1,"MatSetValues_MPIDense:Column too large");
32         ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv);
33         CHKERRQ(ierr);
34       }
35     }
36     else {
37       ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv);
38       CHKERRQ(ierr);
39     }
40   }
41   return 0;
42 }
43 
44 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
45 {
46   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
47   MPI_Comm     comm = mat->comm;
48   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
49   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
50   int          tag = mat->tag, *owner,*starts,count,ierr;
51   InsertMode   addv;
52   MPI_Request  *send_waits,*recv_waits;
53   Scalar       *rvalues,*svalues;
54 
55   /* make sure all processors are either in INSERTMODE or ADDMODE */
56   MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT,
57                 MPI_BOR,comm);
58   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
59     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
60     }
61   mdn->insertmode = addv; /* in case this processor had no cache */
62 
63   /*  first count number of contributors to each processor */
64   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
65   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
66   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
67   for ( i=0; i<mdn->stash.n; i++ ) {
68     idx = mdn->stash.idx[i];
69     for ( j=0; j<size; j++ ) {
70       if (idx >= owners[j] && idx < owners[j+1]) {
71         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
72       }
73     }
74   }
75   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
76 
77   /* inform other processors of number of messages and max length*/
78   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
79   MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm);
80   nreceives = work[rank];
81   MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm);
82   nmax = work[rank];
83   PetscFree(work);
84 
85   /* post receives:
86        1) each message will consist of ordered pairs
87      (global index,value) we store the global index as a double
88      to simplify the message passing.
89        2) since we don't know how long each individual message is we
90      allocate the largest needed buffer for each receive. Potentially
91      this is a lot of wasted space.
92 
93        This could be done better.
94   */
95   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
96   CHKPTRQ(rvalues);
97   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
98   CHKPTRQ(recv_waits);
99   for ( i=0; i<nreceives; i++ ) {
100     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
101               comm,recv_waits+i);
102   }
103 
104   /* do sends:
105       1) starts[i] gives the starting index in svalues for stuff going to
106          the ith processor
107   */
108   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
109   CHKPTRQ(svalues);
110   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
111   CHKPTRQ(send_waits);
112   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
113   starts[0] = 0;
114   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
115   for ( i=0; i<mdn->stash.n; i++ ) {
116     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
117     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
118     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
119   }
120   PetscFree(owner);
121   starts[0] = 0;
122   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
123   count = 0;
124   for ( i=0; i<size; i++ ) {
125     if (procs[i]) {
126       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
127                 comm,send_waits+count++);
128     }
129   }
130   PetscFree(starts); PetscFree(nprocs);
131 
132   /* Free cache space */
133   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
134 
135   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
136   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
137   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
138   mdn->rmax       = nmax;
139 
140   return 0;
141 }
142 extern int MatSetUpMultiply_MPIDense(Mat);
143 
144 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
145 {
146   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
147   MPI_Status   *send_status,recv_status;
148   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
149   Scalar       *values,val;
150   InsertMode   addv = mdn->insertmode;
151 
152   /*  wait on receives */
153   while (count) {
154     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
155     /* unpack receives into our local space */
156     values = mdn->rvalues + 3*imdex*mdn->rmax;
157     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
158     n = n/3;
159     for ( i=0; i<n; i++ ) {
160       row = (int) PETSCREAL(values[3*i]) - mdn->rstart;
161       col = (int) PETSCREAL(values[3*i+1]);
162       val = values[3*i+2];
163       if (col >= 0 && col < mdn->N) {
164         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
165       }
166       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
167     }
168     count--;
169   }
170   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
171 
172   /* wait on sends */
173   if (mdn->nsends) {
174     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
175     CHKPTRQ(send_status);
176     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
177     PetscFree(send_status);
178   }
179   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
180 
181   mdn->insertmode = NOT_SET_VALUES;
182   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
183   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
184 
185   if (!mdn->assembled && mode == FINAL_ASSEMBLY) {
186     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
187   }
188   mdn->assembled = 1;
189   return 0;
190 }
191 
192 static int MatZeroEntries_MPIDense(Mat A)
193 {
194   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
195   return MatZeroEntries(l->A);
196 }
197 
198 /* the code does not do the diagonal entries correctly unless the
199    matrix is square and the column and row owerships are identical.
200    This is a BUG. The only way to fix it seems to be to access
201    mdn->A and mdn->B directly and not through the MatZeroRows()
202    routine.
203 */
204 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
205 {
206   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
207   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
208   int            *procs,*nprocs,j,found,idx,nsends,*work;
209   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
210   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
211   int            *lens,imdex,*lrows,*values;
212   MPI_Comm       comm = A->comm;
213   MPI_Request    *send_waits,*recv_waits;
214   MPI_Status     recv_status,*send_status;
215   IS             istmp;
216 
217   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix");
218   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
219   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
220 
221   /*  first count number of contributors to each processor */
222   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
223   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
224   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
225   for ( i=0; i<N; i++ ) {
226     idx = rows[i];
227     found = 0;
228     for ( j=0; j<size; j++ ) {
229       if (idx >= owners[j] && idx < owners[j+1]) {
230         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
231       }
232     }
233     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
234   }
235   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
236 
237   /* inform other processors of number of messages and max length*/
238   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
239   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
240   nrecvs = work[rank];
241   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
242   nmax = work[rank];
243   PetscFree(work);
244 
245   /* post receives:   */
246   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
247   CHKPTRQ(rvalues);
248   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
249   CHKPTRQ(recv_waits);
250   for ( i=0; i<nrecvs; i++ ) {
251     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
252   }
253 
254   /* do sends:
255       1) starts[i] gives the starting index in svalues for stuff going to
256          the ith processor
257   */
258   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
259   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
260   CHKPTRQ(send_waits);
261   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
262   starts[0] = 0;
263   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
264   for ( i=0; i<N; i++ ) {
265     svalues[starts[owner[i]]++] = rows[i];
266   }
267   ISRestoreIndices(is,&rows);
268 
269   starts[0] = 0;
270   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
271   count = 0;
272   for ( i=0; i<size; i++ ) {
273     if (procs[i]) {
274       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
275     }
276   }
277   PetscFree(starts);
278 
279   base = owners[rank];
280 
281   /*  wait on receives */
282   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
283   source = lens + nrecvs;
284   count  = nrecvs; slen = 0;
285   while (count) {
286     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
287     /* unpack receives into our local space */
288     MPI_Get_count(&recv_status,MPI_INT,&n);
289     source[imdex]  = recv_status.MPI_SOURCE;
290     lens[imdex]  = n;
291     slen += n;
292     count--;
293   }
294   PetscFree(recv_waits);
295 
296   /* move the data into the send scatter */
297   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
298   count = 0;
299   for ( i=0; i<nrecvs; i++ ) {
300     values = rvalues + i*nmax;
301     for ( j=0; j<lens[i]; j++ ) {
302       lrows[count++] = values[j] - base;
303     }
304   }
305   PetscFree(rvalues); PetscFree(lens);
306   PetscFree(owner); PetscFree(nprocs);
307 
308   /* actually zap the local rows */
309   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
310   PLogObjectParent(A,istmp);
311   PetscFree(lrows);
312   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
313   ierr = ISDestroy(istmp); CHKERRQ(ierr);
314 
315   /* wait on sends */
316   if (nsends) {
317     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
318     CHKPTRQ(send_status);
319     MPI_Waitall(nsends,send_waits,send_status);
320     PetscFree(send_status);
321   }
322   PetscFree(send_waits); PetscFree(svalues);
323 
324   return 0;
325 }
326 
327 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
328 {
329   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
330   int          ierr;
331   if (!mdn->assembled)
332     SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first");
333   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
334   CHKERRQ(ierr);
335   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
336   CHKERRQ(ierr);
337   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
338   return 0;
339 }
340 
341 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
342 {
343   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
344   int          ierr;
345   if (!mdn->assembled)
346     SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first");
347   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
348   CHKERRQ(ierr);
349   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
350   CHKERRQ(ierr);
351   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
352   return 0;
353 }
354 
355 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
356 {
357   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
358   int          ierr;
359   Scalar       zero = 0.0;
360 
361   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix");
362   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
363   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
364   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
365          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
366   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
367          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
368   return 0;
369 }
370 
371 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
372 {
373   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
374   int          ierr;
375 
376   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix");
377   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
378   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
379   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
380          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
381   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
382          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
383   return 0;
384 }
385 
386 static int MatGetDiagonal_MPIDense(Mat A,Vec v)
387 {
388   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
389   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
390   int          ierr, i, n, m = a->m, radd;
391   Scalar       *x;
392 
393   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix");
394   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
395   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
396   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
397   radd = a->rstart*m*m;
398   for ( i=0; i<m; i++ ) {
399     x[i] = aloc->v[radd + i*m + i];
400   }
401   return 0;
402 }
403 
404 static int MatDestroy_MPIDense(PetscObject obj)
405 {
406   Mat          mat = (Mat) obj;
407   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
408   int          ierr;
409 
410 #if defined(PETSC_LOG)
411   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
412 #endif
413   PetscFree(mdn->rowners);
414   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
415   if (mdn->lvec)   VecDestroy(mdn->lvec);
416   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
417   PetscFree(mdn);
418   PLogObjectDestroy(mat);
419   PetscHeaderDestroy(mat);
420   return 0;
421 }
422 
423 #include "pinclude/pviewer.h"
424 
425 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
426 {
427   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
428   int          ierr;
429   if (mdn->size == 1) {
430     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
431   }
432   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
433   return 0;
434 }
435 
436 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
437 {
438   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
439   int          ierr, format;
440   PetscObject  vobj = (PetscObject) viewer;
441   FILE         *fd;
442 
443   ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
444   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
445     ierr = ViewerFileGetFormat_Private(viewer,&format);
446     if (format == FILE_FORMAT_INFO_DETAILED) {
447       int nz, nzalloc, mem, rank;
448       MPI_Comm_rank(mat->comm,&rank);
449       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
450       MPIU_Seq_begin(mat->comm,1);
451         fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
452             rank,mdn->m,nz,nzalloc,mem);
453       fflush(fd);
454       MPIU_Seq_end(mat->comm,1);
455       ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
456       return 0;
457     }
458     else if (format == FILE_FORMAT_INFO) {
459       return 0;
460     }
461   }
462   if (vobj->type == ASCII_FILE_VIEWER) {
463     MPIU_Seq_begin(mat->comm,1);
464     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
465              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
466     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
467     fflush(fd);
468     MPIU_Seq_end(mat->comm,1);
469   }
470   else {
471     int size = mdn->size, rank = mdn->rank;
472     if (size == 1) {
473       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
474     }
475     else {
476       /* assemble the entire matrix onto first processor. */
477       Mat          A;
478       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
479       Scalar       *vals;
480       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
481 
482       if (!rank) {
483         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,&A); CHKERRQ(ierr);
484       }
485       else {
486         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,&A); CHKERRQ(ierr);
487       }
488       PLogObjectParent(mat,A);
489 
490       /* Copy the matrix ... This isn't the most efficient means,
491          but it's quick for now */
492       row = mdn->rstart; m = Amdn->m;
493       for ( i=0; i<m; i++ ) {
494         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
495         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
496         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
497         row++;
498       }
499 
500       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
501       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
502       if (!rank) {
503         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
504       }
505       ierr = MatDestroy(A); CHKERRQ(ierr);
506     }
507   }
508   return 0;
509 }
510 
511 static int MatView_MPIDense(PetscObject obj,Viewer viewer)
512 {
513   Mat          mat = (Mat) obj;
514   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
515   PetscObject  vobj = (PetscObject) viewer;
516   int          ierr;
517 
518   if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix");
519   if (!viewer) {
520     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
521   }
522   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
523     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
524   }
525   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
526     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
527   }
528   else if (vobj->type == BINARY_FILE_VIEWER) {
529     return MatView_MPIDense_Binary(mat,viewer);
530   }
531   return 0;
532 }
533 
534 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
535                              int *nzalloc,int *mem)
536 {
537   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
538   Mat          mdn = mat->A;
539   int          ierr, isend[3], irecv[3];
540 
541   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
542   if (flag == MAT_LOCAL) {
543     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
544   } else if (flag == MAT_GLOBAL_MAX) {
545     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
546     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
547   } else if (flag == MAT_GLOBAL_SUM) {
548     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
549     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
550   }
551   return 0;
552 }
553 
554 static int MatSetOption_MPIDense(Mat A,MatOption op)
555 {
556   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
557 
558   if (op == NO_NEW_NONZERO_LOCATIONS ||
559       op == YES_NEW_NONZERO_LOCATIONS ||
560       op == COLUMNS_SORTED ||
561       op == ROW_ORIENTED) {
562         MatSetOption(a->A,op);
563   }
564   else if (op == ROWS_SORTED ||
565            op == SYMMETRIC_MATRIX ||
566            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
567            op == YES_NEW_DIAGONALS)
568     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
569   else if (op == COLUMN_ORIENTED)
570     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");}
571   else if (op == NO_NEW_DIAGONALS)
572     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
573   else
574     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
575   return 0;
576 }
577 
578 static int MatGetSize_MPIDense(Mat A,int *m,int *n)
579 {
580   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
581   *m = mat->M; *n = mat->N;
582   return 0;
583 }
584 
585 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
586 {
587   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
588   *m = mat->m; *n = mat->N;
589   return 0;
590 }
591 
592 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
593 {
594   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
595   *m = mat->rstart; *n = mat->rend;
596   return 0;
597 }
598 
599 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
600 {
601   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
602   int          lrow, rstart = mat->rstart, rend = mat->rend;
603 
604   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
605   lrow = row - rstart;
606   return MatGetRow(mat->A,lrow,nz,idx,v);
607 }
608 
609 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
610 {
611   if (idx) PetscFree(*idx);
612   if (v) PetscFree(*v);
613   return 0;
614 }
615 
616 static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
617 {
618   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
619   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
620   int          ierr, i, j;
621   double       sum = 0.0;
622   Scalar       *v = mat->v;
623 
624   if (!mdn->assembled) SETERRQ(1,"MatNorm_MPIDense:Must assemble matrix");
625   if (mdn->size == 1) {
626     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
627   } else {
628     if (type == NORM_FROBENIUS) {
629       for (i=0; i<mat->n*mat->m; i++ ) {
630 #if defined(PETSC_COMPLEX)
631         sum += real(conj(*v)*(*v)); v++;
632 #else
633         sum += (*v)*(*v); v++;
634 #endif
635       }
636       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
637       *norm = sqrt(*norm);
638       PLogFlops(2*mat->n*mat->m);
639     }
640     else if (type == NORM_1) {
641       double *tmp, *tmp2;
642       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
643       tmp2 = tmp + mdn->N;
644       PetscMemzero(tmp,2*mdn->N*sizeof(double));
645       *norm = 0.0;
646       v = mat->v;
647       for ( j=0; j<mat->n; j++ ) {
648         for ( i=0; i<mat->m; i++ ) {
649           tmp[j] += PetscAbsScalar(*v);  v++;
650         }
651       }
652       MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
653       for ( j=0; j<mdn->N; j++ ) {
654         if (tmp2[j] > *norm) *norm = tmp2[j];
655       }
656       PetscFree(tmp);
657       PLogFlops(mat->n*mat->m);
658     }
659     else if (type == NORM_INFINITY) { /* max row norm */
660       double ntemp;
661       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
662       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
663     }
664     else {
665       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
666     }
667   }
668   return 0;
669 }
670 
671 static int MatTranspose_MPIDense(Mat A,Mat *matout)
672 {
673   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
674   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
675   Mat          B;
676   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
677   int          j, i, ierr;
678   Scalar       *v;
679 
680   if (!matout && M != N)
681     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
682   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B); CHKERRQ(ierr);
683 
684   m = Aloc->m; n = Aloc->n; v = Aloc->v;
685   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
686   for ( j=0; j<n; j++ ) {
687     for (i=0; i<m; i++) rwork[i] = rstart + i;
688     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
689     v += m;
690   }
691   PetscFree(rwork);
692   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
693   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
694   if (matout) {
695     *matout = B;
696   } else {
697     /* This isn't really an in-place transpose, but free data struct from a */
698     PetscFree(a->rowners);
699     ierr = MatDestroy(a->A); CHKERRQ(ierr);
700     if (a->lvec) VecDestroy(a->lvec);
701     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
702     PetscFree(a);
703     PetscMemcpy(A,B,sizeof(struct _Mat));
704     PetscHeaderDestroy(B);
705   }
706   return 0;
707 }
708 
709 static int MatCopyPrivate_MPIDense(Mat,Mat *,int);
710 
711 /* -------------------------------------------------------------------*/
712 static struct _MatOps MatOps = {MatSetValues_MPIDense,
713        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
714        MatMult_MPIDense,MatMultAdd_MPIDense,
715        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
716        0,0,
717        0,0,0,
718        0,0,MatTranspose_MPIDense,
719        MatGetInfo_MPIDense,0,
720        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
721        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
722        0,
723        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
724        0,
725        0,0,0,0,
726        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
727        MatGetOwnershipRange_MPIDense,
728        0,0,
729        0,0,0,0,0,MatCopyPrivate_MPIDense};
730 
731 /*@C
732    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
733 
734    Input Parameters:
735 .  comm - MPI communicator
736 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
737 .  n - number of local columns (or PETSC_DECIDE to have calculated
738            if N is given)
739 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
740 .  N - number of global columns (or PETSC_DECIDE to have calculated
741            if n is given)
742 
743    Output Parameter:
744 .  newmat - the matrix
745 
746    Notes:
747    The dense format is fully compatible with standard Fortran 77
748    storage by columns.
749 
750    The user MUST specify either the local or global matrix dimensions
751    (possibly both).
752 
753    Currently, the only parallel dense matrix decomposition is by rows,
754    so that n=N and each submatrix owns all of the global columns.
755 
756 .keywords: matrix, dense, parallel
757 
758 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
759 @*/
760 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Mat *newmat)
761 {
762   Mat          mat;
763   Mat_MPIDense *a;
764   int          ierr, i;
765 
766   *newmat         = 0;
767   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
768   PLogObjectCreate(mat);
769   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
770   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
771   mat->destroy    = MatDestroy_MPIDense;
772   mat->view       = MatView_MPIDense;
773   mat->factor     = 0;
774 
775   a->insertmode = NOT_SET_VALUES;
776   MPI_Comm_rank(comm,&a->rank);
777   MPI_Comm_size(comm,&a->size);
778 
779   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
780   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
781 
782   /* each row stores all columns */
783   if (N == PETSC_DECIDE) N = n;
784   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
785   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
786   a->N = N;
787   a->M = M;
788   a->m = m;
789   a->n = n;
790 
791   /* build local table of row and column ownerships */
792   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
793   a->cowners = a->rowners + a->size + 1;
794   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
795                        sizeof(Mat_MPIDense));
796   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
797   a->rowners[0] = 0;
798   for ( i=2; i<=a->size; i++ ) {
799     a->rowners[i] += a->rowners[i-1];
800   }
801   a->rstart = a->rowners[a->rank];
802   a->rend   = a->rowners[a->rank+1];
803   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
804   a->cowners[0] = 0;
805   for ( i=2; i<=a->size; i++ ) {
806     a->cowners[i] += a->cowners[i-1];
807   }
808 
809   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,&a->A); CHKERRQ(ierr);
810   PLogObjectParent(mat,a->A);
811 
812   /* build cache for off array entries formed */
813   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
814 
815   /* stuff used for matrix vector multiply */
816   a->lvec      = 0;
817   a->Mvctx     = 0;
818   a->assembled = 0;
819 
820   *newmat = mat;
821   return 0;
822 }
823 
824 static int MatCopyPrivate_MPIDense(Mat A,Mat *newmat,int cpvalues)
825 {
826   Mat          mat;
827   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
828   int          ierr;
829 
830   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix");
831   *newmat       = 0;
832   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
833   PLogObjectCreate(mat);
834   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
835   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
836   mat->destroy  = MatDestroy_MPIDense;
837   mat->view     = MatView_MPIDense;
838   mat->factor   = A->factor;
839 
840   a->m          = oldmat->m;
841   a->n          = oldmat->n;
842   a->M          = oldmat->M;
843   a->N          = oldmat->N;
844 
845   a->assembled  = 1;
846   a->rstart     = oldmat->rstart;
847   a->rend       = oldmat->rend;
848   a->size       = oldmat->size;
849   a->rank       = oldmat->rank;
850   a->insertmode = NOT_SET_VALUES;
851 
852   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
853   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
854   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
855   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
856 
857   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
858   PLogObjectParent(mat,a->lvec);
859   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
860   PLogObjectParent(mat,a->Mvctx);
861   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
862   PLogObjectParent(mat,a->A);
863   *newmat = mat;
864   return 0;
865 }
866 
867 #include "sysio.h"
868 
869 int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
870 {
871   Mat          A;
872   int          i, nz, ierr, j,rstart, rend, fd;
873   Scalar       *vals,*svals;
874   PetscObject  vobj = (PetscObject) bview;
875   MPI_Comm     comm = vobj->comm;
876   MPI_Status   status;
877   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
878   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
879   int          tag = ((PetscObject)bview)->tag;
880 
881   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
882   if (!rank) {
883     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
884     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
885     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
886   }
887 
888   MPI_Bcast(header+1,3,MPI_INT,0,comm);
889   M = header[1]; N = header[2];
890   /* determine ownership of all rows */
891   m = M/size + ((M % size) > rank);
892   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
893   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
894   rowners[0] = 0;
895   for ( i=2; i<=size; i++ ) {
896     rowners[i] += rowners[i-1];
897   }
898   rstart = rowners[rank];
899   rend   = rowners[rank+1];
900 
901   /* distribute row lengths to all processors */
902   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
903   offlens = ourlens + (rend-rstart);
904   if (!rank) {
905     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
906     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
907     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
908     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
909     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
910     PetscFree(sndcounts);
911   }
912   else {
913     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
914   }
915 
916   if (!rank) {
917     /* calculate the number of nonzeros on each processor */
918     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
919     PetscMemzero(procsnz,size*sizeof(int));
920     for ( i=0; i<size; i++ ) {
921       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
922         procsnz[i] += rowlengths[j];
923       }
924     }
925     PetscFree(rowlengths);
926 
927     /* determine max buffer needed and allocate it */
928     maxnz = 0;
929     for ( i=0; i<size; i++ ) {
930       maxnz = PetscMax(maxnz,procsnz[i]);
931     }
932     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
933 
934     /* read in my part of the matrix column indices  */
935     nz = procsnz[0];
936     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
937     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
938 
939     /* read in every one elses and ship off */
940     for ( i=1; i<size; i++ ) {
941       nz = procsnz[i];
942       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
943       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
944     }
945     PetscFree(cols);
946   }
947   else {
948     /* determine buffer space needed for message */
949     nz = 0;
950     for ( i=0; i<m; i++ ) {
951       nz += ourlens[i];
952     }
953     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
954 
955     /* receive message of column indices*/
956     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
957     MPI_Get_count(&status,MPI_INT,&maxnz);
958     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
959   }
960 
961   /* loop over local rows, determining number of off diagonal entries */
962   PetscMemzero(offlens,m*sizeof(int));
963   jj = 0;
964   for ( i=0; i<m; i++ ) {
965     for ( j=0; j<ourlens[i]; j++ ) {
966       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
967       jj++;
968     }
969   }
970 
971   /* create our matrix */
972   for ( i=0; i<m; i++ ) {
973     ourlens[i] -= offlens[i];
974   }
975   if (type == MATMPIDENSE) {
976     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
977   }
978   A = *newmat;
979   for ( i=0; i<m; i++ ) {
980     ourlens[i] += offlens[i];
981   }
982 
983   if (!rank) {
984     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
985 
986     /* read in my part of the matrix numerical values  */
987     nz = procsnz[0];
988     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
989 
990     /* insert into matrix */
991     jj      = rstart;
992     smycols = mycols;
993     svals   = vals;
994     for ( i=0; i<m; i++ ) {
995       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
996       smycols += ourlens[i];
997       svals   += ourlens[i];
998       jj++;
999     }
1000 
1001     /* read in other processors and ship out */
1002     for ( i=1; i<size; i++ ) {
1003       nz = procsnz[i];
1004       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1005       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1006     }
1007     PetscFree(procsnz);
1008   }
1009   else {
1010     /* receive numeric values */
1011     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1012 
1013     /* receive message of values*/
1014     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1015     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1016     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1017 
1018     /* insert into matrix */
1019     jj      = rstart;
1020     smycols = mycols;
1021     svals   = vals;
1022     for ( i=0; i<m; i++ ) {
1023       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1024       smycols += ourlens[i];
1025       svals   += ourlens[i];
1026       jj++;
1027     }
1028   }
1029   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1030 
1031   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1033   return 0;
1034 }
1035