xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 18f449ed41991115d803c2e2c8f42c76f04cb975)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.11 1995/11/09 22:28:45 bsmith Exp curfman $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "mpidense.h"
10 #include "vec/vecimpl.h"
11 #include "inline/spops.h"
12 
13 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,
14                                int *idxn,Scalar *v,InsertMode addv)
15 {
16   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
17   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
18 
19   if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) {
20     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
21   }
22   mdn->insertmode = addv;
23   for ( i=0; i<m; i++ ) {
24     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
25     if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
26     if (idxm[i] >= rstart && idxm[i] < rend) {
27       row = idxm[i] - rstart;
28       for ( j=0; j<n; j++ ) {
29         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
30         if (idxn[j] >= mdn->N)
31           SETERRQ(1,"MatSetValues_MPIDense:Column too large");
32         ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv);
33         CHKERRQ(ierr);
34       }
35     }
36     else {
37       ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv);
38       CHKERRQ(ierr);
39     }
40   }
41   return 0;
42 }
43 
44 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
45 {
46   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
47   MPI_Comm     comm = mat->comm;
48   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
49   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
50   int          tag = mat->tag, *owner,*starts,count,ierr;
51   InsertMode   addv;
52   MPI_Request  *send_waits,*recv_waits;
53   Scalar       *rvalues,*svalues;
54 
55   /* make sure all processors are either in INSERTMODE or ADDMODE */
56   MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT,
57                 MPI_BOR,comm);
58   if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1,
59     "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
60     }
61   mdn->insertmode = addv; /* in case this processor had no cache */
62 
63   /*  first count number of contributors to each processor */
64   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
65   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
66   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
67   for ( i=0; i<mdn->stash.n; i++ ) {
68     idx = mdn->stash.idx[i];
69     for ( j=0; j<size; j++ ) {
70       if (idx >= owners[j] && idx < owners[j+1]) {
71         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
72       }
73     }
74   }
75   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
76 
77   /* inform other processors of number of messages and max length*/
78   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
79   MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm);
80   nreceives = work[rank];
81   MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm);
82   nmax = work[rank];
83   PetscFree(work);
84 
85   /* post receives:
86        1) each message will consist of ordered pairs
87      (global index,value) we store the global index as a double
88      to simplify the message passing.
89        2) since we don't know how long each individual message is we
90      allocate the largest needed buffer for each receive. Potentially
91      this is a lot of wasted space.
92 
93        This could be done better.
94   */
95   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
96   CHKPTRQ(rvalues);
97   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
98   CHKPTRQ(recv_waits);
99   for ( i=0; i<nreceives; i++ ) {
100     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
101               comm,recv_waits+i);
102   }
103 
104   /* do sends:
105       1) starts[i] gives the starting index in svalues for stuff going to
106          the ith processor
107   */
108   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) );
109   CHKPTRQ(svalues);
110   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
111   CHKPTRQ(send_waits);
112   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
113   starts[0] = 0;
114   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
115   for ( i=0; i<mdn->stash.n; i++ ) {
116     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
117     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
118     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
119   }
120   PetscFree(owner);
121   starts[0] = 0;
122   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
123   count = 0;
124   for ( i=0; i<size; i++ ) {
125     if (procs[i]) {
126       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
127                 comm,send_waits+count++);
128     }
129   }
130   PetscFree(starts); PetscFree(nprocs);
131 
132   /* Free cache space */
133   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
134 
135   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
136   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
137   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
138   mdn->rmax       = nmax;
139 
140   return 0;
141 }
142 extern int MatSetUpMultiply_MPIDense(Mat);
143 
144 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
145 {
146   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
147   MPI_Status   *send_status,recv_status;
148   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
149   Scalar       *values,val;
150   InsertMode   addv = mdn->insertmode;
151 
152   /*  wait on receives */
153   while (count) {
154     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
155     /* unpack receives into our local space */
156     values = mdn->rvalues + 3*imdex*mdn->rmax;
157     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
158     n = n/3;
159     for ( i=0; i<n; i++ ) {
160       row = (int) PETSCREAL(values[3*i]) - mdn->rstart;
161       col = (int) PETSCREAL(values[3*i+1]);
162       val = values[3*i+2];
163       if (col >= 0 && col < mdn->N) {
164         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
165       }
166       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
167     }
168     count--;
169   }
170   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
171 
172   /* wait on sends */
173   if (mdn->nsends) {
174     send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) );
175     CHKPTRQ(send_status);
176     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
177     PetscFree(send_status);
178   }
179   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
180 
181   mdn->insertmode = NOT_SET_VALUES;
182   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
183   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
184 
185   if (!mdn->assembled && mode == FINAL_ASSEMBLY) {
186     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
187   }
188   mdn->assembled = 1;
189   return 0;
190 }
191 
192 static int MatZeroEntries_MPIDense(Mat A)
193 {
194   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
195   return MatZeroEntries(l->A);
196 }
197 
198 /* the code does not do the diagonal entries correctly unless the
199    matrix is square and the column and row owerships are identical.
200    This is a BUG. The only way to fix it seems to be to access
201    mdn->A and mdn->B directly and not through the MatZeroRows()
202    routine.
203 */
204 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
205 {
206   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
207   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
208   int            *procs,*nprocs,j,found,idx,nsends,*work;
209   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
210   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
211   int            *lens,imdex,*lrows,*values;
212   MPI_Comm       comm = A->comm;
213   MPI_Request    *send_waits,*recv_waits;
214   MPI_Status     recv_status,*send_status;
215   IS             istmp;
216 
217   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix");
218   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
219   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
220 
221   /*  first count number of contributors to each processor */
222   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
223   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
224   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
225   for ( i=0; i<N; i++ ) {
226     idx = rows[i];
227     found = 0;
228     for ( j=0; j<size; j++ ) {
229       if (idx >= owners[j] && idx < owners[j+1]) {
230         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
231       }
232     }
233     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
234   }
235   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
236 
237   /* inform other processors of number of messages and max length*/
238   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
239   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
240   nrecvs = work[rank];
241   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
242   nmax = work[rank];
243   PetscFree(work);
244 
245   /* post receives:   */
246   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
247   CHKPTRQ(rvalues);
248   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
249   CHKPTRQ(recv_waits);
250   for ( i=0; i<nrecvs; i++ ) {
251     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
252   }
253 
254   /* do sends:
255       1) starts[i] gives the starting index in svalues for stuff going to
256          the ith processor
257   */
258   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
259   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
260   CHKPTRQ(send_waits);
261   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
262   starts[0] = 0;
263   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
264   for ( i=0; i<N; i++ ) {
265     svalues[starts[owner[i]]++] = rows[i];
266   }
267   ISRestoreIndices(is,&rows);
268 
269   starts[0] = 0;
270   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
271   count = 0;
272   for ( i=0; i<size; i++ ) {
273     if (procs[i]) {
274       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
275     }
276   }
277   PetscFree(starts);
278 
279   base = owners[rank];
280 
281   /*  wait on receives */
282   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
283   source = lens + nrecvs;
284   count  = nrecvs; slen = 0;
285   while (count) {
286     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
287     /* unpack receives into our local space */
288     MPI_Get_count(&recv_status,MPI_INT,&n);
289     source[imdex]  = recv_status.MPI_SOURCE;
290     lens[imdex]  = n;
291     slen += n;
292     count--;
293   }
294   PetscFree(recv_waits);
295 
296   /* move the data into the send scatter */
297   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
298   count = 0;
299   for ( i=0; i<nrecvs; i++ ) {
300     values = rvalues + i*nmax;
301     for ( j=0; j<lens[i]; j++ ) {
302       lrows[count++] = values[j] - base;
303     }
304   }
305   PetscFree(rvalues); PetscFree(lens);
306   PetscFree(owner); PetscFree(nprocs);
307 
308   /* actually zap the local rows */
309   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
310   PLogObjectParent(A,istmp);
311   PetscFree(lrows);
312   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
313   ierr = ISDestroy(istmp); CHKERRQ(ierr);
314 
315   /* wait on sends */
316   if (nsends) {
317     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
318     CHKPTRQ(send_status);
319     MPI_Waitall(nsends,send_waits,send_status);
320     PetscFree(send_status);
321   }
322   PetscFree(send_waits); PetscFree(svalues);
323 
324   return 0;
325 }
326 
327 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
328 {
329   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
330   int          ierr;
331   if (!mdn->assembled)
332     SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first");
333   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
334   CHKERRQ(ierr);
335   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
336   CHKERRQ(ierr);
337   ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
338   return 0;
339 }
340 
341 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
342 {
343   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
344   int          ierr;
345   if (!mdn->assembled)
346     SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first");
347   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
348   CHKERRQ(ierr);
349   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);
350   CHKERRQ(ierr);
351   ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
352   return 0;
353 }
354 
355 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
356 {
357   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
358   int          ierr;
359   Scalar       zero = 0.0;
360 
361   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix");
362   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
363   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
364   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
365          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
366   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
367          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
368   return 0;
369 }
370 
371 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
372 {
373   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
374   int          ierr;
375 
376   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix");
377   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
378   ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr);
379   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
380          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
381   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
382          (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
383   return 0;
384 }
385 
386 static int MatGetDiagonal_MPIDense(Mat A,Vec v)
387 {
388   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
389   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
390   int          ierr, i, n, m = a->m, radd;
391   Scalar       *x;
392 
393   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix");
394   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
395   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
396   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
397   radd = a->rstart*m*m;
398   for ( i=0; i<m; i++ ) {
399     x[i] = aloc->v[radd + i*m + i];
400   }
401   return 0;
402 }
403 
404 static int MatDestroy_MPIDense(PetscObject obj)
405 {
406   Mat          mat = (Mat) obj;
407   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
408   int          ierr;
409 
410 #if defined(PETSC_LOG)
411   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
412 #endif
413   PetscFree(mdn->rowners);
414   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
415   if (mdn->lvec)   VecDestroy(mdn->lvec);
416   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
417   PetscFree(mdn);
418   PLogObjectDestroy(mat);
419   PetscHeaderDestroy(mat);
420   return 0;
421 }
422 
423 #include "pinclude/pviewer.h"
424 
425 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
426 {
427   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
428   int          ierr;
429   if (mdn->size == 1) {
430     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
431   }
432   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
433   return 0;
434 }
435 
436 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
437 {
438   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
439   int          ierr, format;
440   PetscObject  vobj = (PetscObject) viewer;
441   FILE         *fd;
442 
443   ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
444   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
445     ierr = ViewerFileGetFormat_Private(viewer,&format);
446     if (format == FILE_FORMAT_INFO_DETAILED) {
447       int nz, nzalloc, mem, rank;
448       MPI_Comm_rank(mat->comm,&rank);
449       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
450       MPIU_Seq_begin(mat->comm,1);
451         fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",
452             rank,mdn->m,nz,nzalloc,mem);
453       fflush(fd);
454       MPIU_Seq_end(mat->comm,1);
455       ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
456       return 0;
457     }
458     else if (format == FILE_FORMAT_INFO) {
459       return 0;
460     }
461   }
462   if (vobj->type == ASCII_FILE_VIEWER) {
463     MPIU_Seq_begin(mat->comm,1);
464     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
465              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
466     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
467     fflush(fd);
468     MPIU_Seq_end(mat->comm,1);
469   }
470   else {
471     int size = mdn->size, rank = mdn->rank;
472     if (size == 1) {
473       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
474     }
475     else {
476       /* assemble the entire matrix onto first processor. */
477       Mat          A;
478       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
479       Scalar       *vals;
480       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
481 
482       if (!rank) {
483         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,0,&A); CHKERRQ(ierr);
484       }
485       else {
486         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,0,&A); CHKERRQ(ierr);
487       }
488       PLogObjectParent(mat,A);
489 
490       /* Copy the matrix ... This isn't the most efficient means,
491          but it's quick for now */
492       row = mdn->rstart; m = Amdn->m;
493       for ( i=0; i<m; i++ ) {
494         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
495         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
496         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
497         row++;
498       }
499 
500       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
501       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
502       if (!rank) {
503         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
504       }
505       ierr = MatDestroy(A); CHKERRQ(ierr);
506     }
507   }
508   return 0;
509 }
510 
511 static int MatView_MPIDense(PetscObject obj,Viewer viewer)
512 {
513   Mat          mat = (Mat) obj;
514   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
515   PetscObject  vobj = (PetscObject) viewer;
516   int          ierr;
517 
518   if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix");
519   if (!viewer) {
520     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
521   }
522   if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
523     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
524   }
525   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
526     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
527   }
528   else if (vobj->type == BINARY_FILE_VIEWER) {
529     return MatView_MPIDense_Binary(mat,viewer);
530   }
531   return 0;
532 }
533 
534 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,
535                              int *nzalloc,int *mem)
536 {
537   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
538   Mat          mdn = mat->A;
539   int          ierr, isend[3], irecv[3];
540 
541   ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
542   if (flag == MAT_LOCAL) {
543     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
544   } else if (flag == MAT_GLOBAL_MAX) {
545     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
546     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
547   } else if (flag == MAT_GLOBAL_SUM) {
548     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
549     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
550   }
551   return 0;
552 }
553 
554 static int MatSetOption_MPIDense(Mat A,MatOption op)
555 {
556   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
557 
558   if (op == NO_NEW_NONZERO_LOCATIONS ||
559       op == YES_NEW_NONZERO_LOCATIONS ||
560       op == COLUMNS_SORTED ||
561       op == ROW_ORIENTED) {
562         MatSetOption(a->A,op);
563   }
564   else if (op == ROWS_SORTED ||
565            op == SYMMETRIC_MATRIX ||
566            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
567            op == YES_NEW_DIAGONALS)
568     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n");
569   else if (op == COLUMN_ORIENTED)
570     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");}
571   else if (op == NO_NEW_DIAGONALS)
572     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");}
573   else
574     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
575   return 0;
576 }
577 
578 static int MatGetSize_MPIDense(Mat A,int *m,int *n)
579 {
580   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
581   *m = mat->M; *n = mat->N;
582   return 0;
583 }
584 
585 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
586 {
587   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
588   *m = mat->m; *n = mat->N;
589   return 0;
590 }
591 
592 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
593 {
594   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
595   *m = mat->rstart; *n = mat->rend;
596   return 0;
597 }
598 
599 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
600 {
601   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
602   int          lrow, rstart = mat->rstart, rend = mat->rend;
603 
604   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
605   lrow = row - rstart;
606   return MatGetRow(mat->A,lrow,nz,idx,v);
607 }
608 
609 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
610 {
611   if (idx) PetscFree(*idx);
612   if (v) PetscFree(*v);
613   return 0;
614 }
615 
616 static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
617 {
618   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
619   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
620   int          ierr, i, j;
621   double       sum = 0.0;
622   Scalar       *v = mat->v;
623 
624   if (!mdn->assembled) SETERRQ(1,"MatNorm_MPIDense:Must assemble matrix");
625   if (mdn->size == 1) {
626     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
627   } else {
628     if (type == NORM_FROBENIUS) {
629       for (i=0; i<mat->n*mat->m; i++ ) {
630 #if defined(PETSC_COMPLEX)
631         sum += real(conj(*v)*(*v)); v++;
632 #else
633         sum += (*v)*(*v); v++;
634 #endif
635       }
636       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
637       *norm = sqrt(*norm);
638       PLogFlops(2*mat->n*mat->m);
639     }
640     else if (type == NORM_1) {
641       double *tmp, *tmp2;
642       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
643       tmp2 = tmp + mdn->N;
644       PetscMemzero(tmp,2*mdn->N*sizeof(double));
645       *norm = 0.0;
646       v = mat->v;
647       for ( j=0; j<mat->n; j++ ) {
648         for ( i=0; i<mat->m; i++ ) {
649           tmp[j] += PetscAbsScalar(*v);  v++;
650         }
651       }
652       MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
653       for ( j=0; j<mdn->N; j++ ) {
654         if (tmp2[j] > *norm) *norm = tmp2[j];
655       }
656       PetscFree(tmp);
657       PLogFlops(mat->n*mat->m);
658     }
659     else if (type == NORM_INFINITY) { /* max row norm */
660       double ntemp;
661       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
662       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
663     }
664     else {
665       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
666     }
667   }
668   return 0;
669 }
670 
671 static int MatTranspose_MPIDense(Mat A,Mat *matout)
672 {
673   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
674   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
675   Mat          B;
676   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
677   int          j, i, ierr;
678   Scalar       *v;
679 
680   if (!matout && M != N)
681     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
682   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,&B); CHKERRQ(ierr);
683 
684   m = Aloc->m; n = Aloc->n; v = Aloc->v;
685   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
686   for ( j=0; j<n; j++ ) {
687     for (i=0; i<m; i++) rwork[i] = rstart + i;
688     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
689     v += m;
690   }
691   PetscFree(rwork);
692   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
693   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
694   if (matout) {
695     *matout = B;
696   } else {
697     /* This isn't really an in-place transpose, but free data struct from a */
698     PetscFree(a->rowners);
699     ierr = MatDestroy(a->A); CHKERRQ(ierr);
700     if (a->lvec) VecDestroy(a->lvec);
701     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
702     PetscFree(a);
703     PetscMemcpy(A,B,sizeof(struct _Mat));
704     PetscHeaderDestroy(B);
705   }
706   return 0;
707 }
708 
709 static int MatCopyPrivate_MPIDense(Mat,Mat *,int);
710 
711 /* -------------------------------------------------------------------*/
712 static struct _MatOps MatOps = {MatSetValues_MPIDense,
713        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
714        MatMult_MPIDense,MatMultAdd_MPIDense,
715        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
716        0,0,
717        0,0,0,
718        0,0,MatTranspose_MPIDense,
719        MatGetInfo_MPIDense,0,
720        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
721        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
722        0,
723        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
724        0,
725        0,0,0,0,
726        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
727        MatGetOwnershipRange_MPIDense,
728        0,0,
729        0,0,0,0,0,MatCopyPrivate_MPIDense};
730 
731 /*@C
732    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
733 
734    Input Parameters:
735 .  comm - MPI communicator
736 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
737 .  n - number of local columns (or PETSC_DECIDE to have calculated
738            if N is given)
739 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
740 .  N - number of global columns (or PETSC_DECIDE to have calculated
741            if n is given)
742 .  data - optional location of matrix data.  Set data=0 for PETSc to
743    control all matrix memory allocation.
744 
745    Output Parameter:
746 .  newmat - the matrix
747 
748    Notes:
749    The dense format is fully compatible with standard Fortran 77
750    storage by columns.
751 
752    The data input variable is intended primarily for Fortran programmers
753    who wish to allocate their own matrix memory space.  Most users should
754    set data=0.
755 
756    The user MUST specify either the local or global matrix dimensions
757    (possibly both).
758 
759    Currently, the only parallel dense matrix decomposition is by rows,
760    so that n=N and each submatrix owns all of the global columns.
761 
762 .keywords: matrix, dense, parallel
763 
764 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
765 @*/
766 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat)
767 {
768   Mat          mat;
769   Mat_MPIDense *a;
770   int          ierr, i;
771 
772 /* Note:  for now, this assumes that the user knows what he's doing if
773    data is specified above.  */
774 
775   *newmat         = 0;
776   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
777   PLogObjectCreate(mat);
778   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
779   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
780   mat->destroy    = MatDestroy_MPIDense;
781   mat->view       = MatView_MPIDense;
782   mat->factor     = 0;
783 
784   a->insertmode = NOT_SET_VALUES;
785   MPI_Comm_rank(comm,&a->rank);
786   MPI_Comm_size(comm,&a->size);
787 
788   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
789   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
790 
791   /* each row stores all columns */
792   if (N == PETSC_DECIDE) N = n;
793   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
794   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
795   a->N = N;
796   a->M = M;
797   a->m = m;
798   a->n = n;
799 
800   /* build local table of row and column ownerships */
801   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
802   a->cowners = a->rowners + a->size + 1;
803   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
804                        sizeof(Mat_MPIDense));
805   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
806   a->rowners[0] = 0;
807   for ( i=2; i<=a->size; i++ ) {
808     a->rowners[i] += a->rowners[i-1];
809   }
810   a->rstart = a->rowners[a->rank];
811   a->rend   = a->rowners[a->rank+1];
812   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
813   a->cowners[0] = 0;
814   for ( i=2; i<=a->size; i++ ) {
815     a->cowners[i] += a->cowners[i-1];
816   }
817 
818   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
819   PLogObjectParent(mat,a->A);
820 
821   /* build cache for off array entries formed */
822   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
823 
824   /* stuff used for matrix vector multiply */
825   a->lvec      = 0;
826   a->Mvctx     = 0;
827   a->assembled = 0;
828 
829   *newmat = mat;
830   return 0;
831 }
832 
833 static int MatCopyPrivate_MPIDense(Mat A,Mat *newmat,int cpvalues)
834 {
835   Mat          mat;
836   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
837   int          ierr;
838 
839   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix");
840   *newmat       = 0;
841   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
842   PLogObjectCreate(mat);
843   mat->data     = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
844   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
845   mat->destroy  = MatDestroy_MPIDense;
846   mat->view     = MatView_MPIDense;
847   mat->factor   = A->factor;
848 
849   a->m          = oldmat->m;
850   a->n          = oldmat->n;
851   a->M          = oldmat->M;
852   a->N          = oldmat->N;
853 
854   a->assembled  = 1;
855   a->rstart     = oldmat->rstart;
856   a->rend       = oldmat->rend;
857   a->size       = oldmat->size;
858   a->rank       = oldmat->rank;
859   a->insertmode = NOT_SET_VALUES;
860 
861   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
862   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
863   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
864   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
865 
866   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
867   PLogObjectParent(mat,a->lvec);
868   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
869   PLogObjectParent(mat,a->Mvctx);
870   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
871   PLogObjectParent(mat,a->A);
872   *newmat = mat;
873   return 0;
874 }
875 
876 #include "sysio.h"
877 
878 int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat)
879 {
880   Mat          A;
881   int          i, nz, ierr, j,rstart, rend, fd;
882   Scalar       *vals,*svals;
883   PetscObject  vobj = (PetscObject) bview;
884   MPI_Comm     comm = vobj->comm;
885   MPI_Status   status;
886   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
887   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
888   int          tag = ((PetscObject)bview)->tag;
889 
890   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
891   if (!rank) {
892     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
893     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
894     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
895   }
896 
897   MPI_Bcast(header+1,3,MPI_INT,0,comm);
898   M = header[1]; N = header[2];
899   /* determine ownership of all rows */
900   m = M/size + ((M % size) > rank);
901   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
902   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
903   rowners[0] = 0;
904   for ( i=2; i<=size; i++ ) {
905     rowners[i] += rowners[i-1];
906   }
907   rstart = rowners[rank];
908   rend   = rowners[rank+1];
909 
910   /* distribute row lengths to all processors */
911   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
912   offlens = ourlens + (rend-rstart);
913   if (!rank) {
914     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
915     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
916     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
917     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
918     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
919     PetscFree(sndcounts);
920   }
921   else {
922     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
923   }
924 
925   if (!rank) {
926     /* calculate the number of nonzeros on each processor */
927     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
928     PetscMemzero(procsnz,size*sizeof(int));
929     for ( i=0; i<size; i++ ) {
930       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
931         procsnz[i] += rowlengths[j];
932       }
933     }
934     PetscFree(rowlengths);
935 
936     /* determine max buffer needed and allocate it */
937     maxnz = 0;
938     for ( i=0; i<size; i++ ) {
939       maxnz = PetscMax(maxnz,procsnz[i]);
940     }
941     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
942 
943     /* read in my part of the matrix column indices  */
944     nz = procsnz[0];
945     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
946     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
947 
948     /* read in every one elses and ship off */
949     for ( i=1; i<size; i++ ) {
950       nz = procsnz[i];
951       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
952       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
953     }
954     PetscFree(cols);
955   }
956   else {
957     /* determine buffer space needed for message */
958     nz = 0;
959     for ( i=0; i<m; i++ ) {
960       nz += ourlens[i];
961     }
962     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
963 
964     /* receive message of column indices*/
965     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
966     MPI_Get_count(&status,MPI_INT,&maxnz);
967     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
968   }
969 
970   /* loop over local rows, determining number of off diagonal entries */
971   PetscMemzero(offlens,m*sizeof(int));
972   jj = 0;
973   for ( i=0; i<m; i++ ) {
974     for ( j=0; j<ourlens[i]; j++ ) {
975       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
976       jj++;
977     }
978   }
979 
980   /* create our matrix */
981   for ( i=0; i<m; i++ ) {
982     ourlens[i] -= offlens[i];
983   }
984   if (type == MATMPIDENSE) {
985     ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,0,newmat); CHKERRQ(ierr);
986   }
987   A = *newmat;
988   for ( i=0; i<m; i++ ) {
989     ourlens[i] += offlens[i];
990   }
991 
992   if (!rank) {
993     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
994 
995     /* read in my part of the matrix numerical values  */
996     nz = procsnz[0];
997     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
998 
999     /* insert into matrix */
1000     jj      = rstart;
1001     smycols = mycols;
1002     svals   = vals;
1003     for ( i=0; i<m; i++ ) {
1004       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1005       smycols += ourlens[i];
1006       svals   += ourlens[i];
1007       jj++;
1008     }
1009 
1010     /* read in other processors and ship out */
1011     for ( i=1; i<size; i++ ) {
1012       nz = procsnz[i];
1013       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1014       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1015     }
1016     PetscFree(procsnz);
1017   }
1018   else {
1019     /* receive numeric values */
1020     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1021 
1022     /* receive message of values*/
1023     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1024     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1025     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1026 
1027     /* insert into matrix */
1028     jj      = rstart;
1029     smycols = mycols;
1030     svals   = vals;
1031     for ( i=0; i<m; i++ ) {
1032       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1033       smycols += ourlens[i];
1034       svals   += ourlens[i];
1035       jj++;
1036     }
1037   }
1038   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1039 
1040   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1041   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1042   return 0;
1043 }
1044