xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision f09e8eb94a771781a812a8d81a9ca3d36ec35eba)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.70 1997/05/03 21:11:31 curfman Exp balay $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 #undef __FUNC__
13 #define __FUNC__ "MatSetValues_MPIDense"
14 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
15 {
16   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
17   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
18   int          roworiented = A->roworiented;
19 
20   for ( i=0; i<m; i++ ) {
21     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
22     if (idxm[i] >= A->M) SETERRQ(1,0,"Row too large");
23     if (idxm[i] >= rstart && idxm[i] < rend) {
24       row = idxm[i] - rstart;
25       if (roworiented) {
26         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
27       }
28       else {
29         for ( j=0; j<n; j++ ) {
30           if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
31           if (idxn[j] >= A->N) SETERRQ(1,0,"Column too large");
32           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
33         }
34       }
35     }
36     else {
37       if (roworiented) {
38         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
39       }
40       else { /* must stash each seperately */
41         row = idxm[i];
42         for ( j=0; j<n; j++ ) {
43           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
44         }
45       }
46     }
47   }
48   return 0;
49 }
50 
51 #undef __FUNC__
52 #define __FUNC__ "MatGetValues_MPIDense"
53 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
54 {
55   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
56   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
57 
58   for ( i=0; i<m; i++ ) {
59     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
60     if (idxm[i] >= mdn->M) SETERRQ(1,0,"Row too large");
61     if (idxm[i] >= rstart && idxm[i] < rend) {
62       row = idxm[i] - rstart;
63       for ( j=0; j<n; j++ ) {
64         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
65         if (idxn[j] >= mdn->N)
66           SETERRQ(1,0,"Column too large");
67         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
68       }
69     }
70     else {
71       SETERRQ(1,0,"Only local values currently supported");
72     }
73   }
74   return 0;
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetArray_MPIDense"
79 int MatGetArray_MPIDense(Mat A,Scalar **array)
80 {
81   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82   int ierr;
83 
84   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
85   return 0;
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatRestoreArray_MPIDense"
90 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
91 {
92   return 0;
93 }
94 
95 #undef __FUNC__
96 #define __FUNC__ "MatAssemblyBegin_MPIDense"
97 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
98 {
99   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
100   MPI_Comm     comm = mat->comm;
101   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
102   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
103   int          tag = mat->tag, *owner,*starts,count,ierr;
104   InsertMode   addv;
105   MPI_Request  *send_waits,*recv_waits;
106   Scalar       *rvalues,*svalues;
107 
108   /* make sure all processors are either in INSERTMODE or ADDMODE */
109   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
110   if (addv == (ADD_VALUES|INSERT_VALUES)) {
111     SETERRQ(1,0,"Cannot mix adds/inserts on different procs");
112   }
113   mat->insertmode = addv; /* in case this processor had no cache */
114 
115   /*  first count number of contributors to each processor */
116   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
117   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
118   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
119   for ( i=0; i<mdn->stash.n; i++ ) {
120     idx = mdn->stash.idx[i];
121     for ( j=0; j<size; j++ ) {
122       if (idx >= owners[j] && idx < owners[j+1]) {
123         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
124       }
125     }
126   }
127   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
128 
129   /* inform other processors of number of messages and max length*/
130   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
131   MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);
132   nreceives = work[rank];
133   if (nreceives > size) SETERRQ(1,0,"Internal PETSc error");
134   MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);
135   nmax = work[rank];
136   PetscFree(work);
137 
138   /* post receives:
139        1) each message will consist of ordered pairs
140      (global index,value) we store the global index as a double
141      to simplify the message passing.
142        2) since we don't know how long each individual message is we
143      allocate the largest needed buffer for each receive. Potentially
144      this is a lot of wasted space.
145 
146        This could be done better.
147   */
148   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
149   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
150   for ( i=0; i<nreceives; i++ ) {
151     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
152   }
153 
154   /* do sends:
155       1) starts[i] gives the starting index in svalues for stuff going to
156          the ith processor
157   */
158   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
159   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
160   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
161   starts[0] = 0;
162   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   for ( i=0; i<mdn->stash.n; i++ ) {
164     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
165     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
166     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
167   }
168   PetscFree(owner);
169   starts[0] = 0;
170   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
171   count = 0;
172   for ( i=0; i<size; i++ ) {
173     if (procs[i]) {
174       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);
175     }
176   }
177   PetscFree(starts); PetscFree(nprocs);
178 
179   /* Free cache space */
180   PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n);
181   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
182 
183   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
184   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
185   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
186   mdn->rmax       = nmax;
187 
188   return 0;
189 }
190 extern int MatSetUpMultiply_MPIDense(Mat);
191 
192 #undef __FUNC__
193 #define __FUNC__ "MatAssemblyEnd_MPIDense"
194 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
195 {
196   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
197   MPI_Status   *send_status,recv_status;
198   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
199   Scalar       *values,val;
200   InsertMode   addv = mat->insertmode;
201 
202   /*  wait on receives */
203   while (count) {
204     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
205     /* unpack receives into our local space */
206     values = mdn->rvalues + 3*imdex*mdn->rmax;
207     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
208     n = n/3;
209     for ( i=0; i<n; i++ ) {
210       row = (int) PetscReal(values[3*i]) - mdn->rstart;
211       col = (int) PetscReal(values[3*i+1]);
212       val = values[3*i+2];
213       if (col >= 0 && col < mdn->N) {
214         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
215       }
216       else {SETERRQ(1,0,"Invalid column");}
217     }
218     count--;
219   }
220   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
221 
222   /* wait on sends */
223   if (mdn->nsends) {
224     send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
225     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
226     PetscFree(send_status);
227   }
228   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
229 
230   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
235   }
236   return 0;
237 }
238 
239 #undef __FUNC__
240 #define __FUNC__ "MatZeroEntries_MPIDense"
241 int MatZeroEntries_MPIDense(Mat A)
242 {
243   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
244   return MatZeroEntries(l->A);
245 }
246 
247 #undef __FUNC__
248 #define __FUNC__ "MatGetBlockSize_MPIDense"
249 int MatGetBlockSize_MPIDense(Mat A,int *bs)
250 {
251   *bs = 1;
252   return 0;
253 }
254 
255 /* the code does not do the diagonal entries correctly unless the
256    matrix is square and the column and row owerships are identical.
257    This is a BUG. The only way to fix it seems to be to access
258    mdn->A and mdn->B directly and not through the MatZeroRows()
259    routine.
260 */
261 #undef __FUNC__
262 #define __FUNC__ "MatZeroRows_MPIDense"
263 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
264 {
265   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
266   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
267   int            *procs,*nprocs,j,found,idx,nsends,*work;
268   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
269   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
270   int            *lens,imdex,*lrows,*values;
271   MPI_Comm       comm = A->comm;
272   MPI_Request    *send_waits,*recv_waits;
273   MPI_Status     recv_status,*send_status;
274   IS             istmp;
275 
276   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
277   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
278 
279   /*  first count number of contributors to each processor */
280   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
281   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
282   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
283   for ( i=0; i<N; i++ ) {
284     idx = rows[i];
285     found = 0;
286     for ( j=0; j<size; j++ ) {
287       if (idx >= owners[j] && idx < owners[j+1]) {
288         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
289       }
290     }
291     if (!found) SETERRQ(1,0,"Index out of range");
292   }
293   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
294 
295   /* inform other processors of number of messages and max length*/
296   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
297   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
298   nrecvs = work[rank];
299   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
300   nmax = work[rank];
301   PetscFree(work);
302 
303   /* post receives:   */
304   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
305   CHKPTRQ(rvalues);
306   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
307   CHKPTRQ(recv_waits);
308   for ( i=0; i<nrecvs; i++ ) {
309     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
310   }
311 
312   /* do sends:
313       1) starts[i] gives the starting index in svalues for stuff going to
314          the ith processor
315   */
316   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
317   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
318   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
319   starts[0]  = 0;
320   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
321   for ( i=0; i<N; i++ ) {
322     svalues[starts[owner[i]]++] = rows[i];
323   }
324   ISRestoreIndices(is,&rows);
325 
326   starts[0] = 0;
327   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
328   count = 0;
329   for ( i=0; i<size; i++ ) {
330     if (procs[i]) {
331       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
332     }
333   }
334   PetscFree(starts);
335 
336   base = owners[rank];
337 
338   /*  wait on receives */
339   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
340   source = lens + nrecvs;
341   count  = nrecvs; slen = 0;
342   while (count) {
343     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
344     /* unpack receives into our local space */
345     MPI_Get_count(&recv_status,MPI_INT,&n);
346     source[imdex]  = recv_status.MPI_SOURCE;
347     lens[imdex]  = n;
348     slen += n;
349     count--;
350   }
351   PetscFree(recv_waits);
352 
353   /* move the data into the send scatter */
354   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
355   count = 0;
356   for ( i=0; i<nrecvs; i++ ) {
357     values = rvalues + i*nmax;
358     for ( j=0; j<lens[i]; j++ ) {
359       lrows[count++] = values[j] - base;
360     }
361   }
362   PetscFree(rvalues); PetscFree(lens);
363   PetscFree(owner); PetscFree(nprocs);
364 
365   /* actually zap the local rows */
366   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
367   PLogObjectParent(A,istmp);
368   PetscFree(lrows);
369   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
370   ierr = ISDestroy(istmp); CHKERRQ(ierr);
371 
372   /* wait on sends */
373   if (nsends) {
374     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
375     MPI_Waitall(nsends,send_waits,send_status);
376     PetscFree(send_status);
377   }
378   PetscFree(send_waits); PetscFree(svalues);
379 
380   return 0;
381 }
382 
383 #undef __FUNC__
384 #define __FUNC__ "MatMult_MPIDense"
385 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
386 {
387   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
388   int          ierr;
389 
390   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
391   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
392   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
393   return 0;
394 }
395 
396 #undef __FUNC__
397 #define __FUNC__ "MatMultAdd_MPIDense"
398 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
399 {
400   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
401   int          ierr;
402 
403   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
404   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
405   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
406   return 0;
407 }
408 
409 #undef __FUNC__
410 #define __FUNC__ "MatMultTrans_MPIDense"
411 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
412 {
413   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
414   int          ierr;
415   Scalar       zero = 0.0;
416 
417   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
418   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
419   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
420   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
421   return 0;
422 }
423 
424 #undef __FUNC__
425 #define __FUNC__ "MatMultTransAdd_MPIDense"
426 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
427 {
428   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
429   int          ierr;
430 
431   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
432   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
433   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
434   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
435   return 0;
436 }
437 
438 #undef __FUNC__
439 #define __FUNC__ "MatGetDiagonal_MPIDense"
440 int MatGetDiagonal_MPIDense(Mat A,Vec v)
441 {
442   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
443   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
444   int          ierr, len, i, n, m = a->m, radd;
445   Scalar       *x, zero = 0.0;
446 
447   VecSet(&zero,v);
448   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
449   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
450   if (n != a->M) SETERRQ(1,0,"Nonconforming mat and vec");
451   len = PetscMin(aloc->m,aloc->n);
452   radd = a->rstart*m;
453   for ( i=0; i<len; i++ ) {
454     x[i] = aloc->v[radd + i*m + i];
455   }
456   return 0;
457 }
458 
459 #undef __FUNC__
460 #define __FUNC__ "MatDestroy_MPIDense"
461 int MatDestroy_MPIDense(PetscObject obj)
462 {
463   Mat          mat = (Mat) obj;
464   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
465   int          ierr;
466 
467 #if defined(PETSC_LOG)
468   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
469 #endif
470   PetscFree(mdn->rowners);
471   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
472   if (mdn->lvec)   VecDestroy(mdn->lvec);
473   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
474   if (mdn->factor) {
475     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
476     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
477     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
478     PetscFree(mdn->factor);
479   }
480   PetscFree(mdn);
481   if (mat->mapping) {
482     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
483   }
484   PLogObjectDestroy(mat);
485   PetscHeaderDestroy(mat);
486   return 0;
487 }
488 
489 #include "pinclude/pviewer.h"
490 
491 #undef __FUNC__
492 #define __FUNC__ "MatView_MPIDense_Binary"
493 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
494 {
495   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
496   int          ierr;
497 
498   if (mdn->size == 1) {
499     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
500   }
501   else SETERRQ(1,0,"Only uniprocessor output supported");
502   return 0;
503 }
504 
505 #undef __FUNC__
506 #define __FUNC__ "MatView_MPIDense_ASCII"
507 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
508 {
509   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
510   int          ierr, format;
511   FILE         *fd;
512   ViewerType   vtype;
513 
514   ViewerGetType(viewer,&vtype);
515   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
516   ierr = ViewerGetFormat(viewer,&format);
517   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
518     int rank;
519     MatInfo info;
520     MPI_Comm_rank(mat->comm,&rank);
521     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
522     PetscSequentialPhaseBegin(mat->comm,1);
523       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
524          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
525       fflush(fd);
526     PetscSequentialPhaseEnd(mat->comm,1);
527     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
528     return 0;
529   }
530   else if (format == VIEWER_FORMAT_ASCII_INFO) {
531     return 0;
532   }
533   if (vtype == ASCII_FILE_VIEWER) {
534     PetscSequentialPhaseBegin(mat->comm,1);
535     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
536              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
537     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
538     fflush(fd);
539     PetscSequentialPhaseEnd(mat->comm,1);
540   }
541   else {
542     int size = mdn->size, rank = mdn->rank;
543     if (size == 1) {
544       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
545     }
546     else {
547       /* assemble the entire matrix onto first processor. */
548       Mat          A;
549       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
550       Scalar       *vals;
551       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
552 
553       if (!rank) {
554         ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
555       }
556       else {
557         ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
558       }
559       PLogObjectParent(mat,A);
560 
561       /* Copy the matrix ... This isn't the most efficient means,
562          but it's quick for now */
563       row = mdn->rstart; m = Amdn->m;
564       for ( i=0; i<m; i++ ) {
565         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
566         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
567         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
568         row++;
569       }
570 
571       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
572       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
573       if (!rank) {
574         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
575       }
576       ierr = MatDestroy(A); CHKERRQ(ierr);
577     }
578   }
579   return 0;
580 }
581 
582 #undef __FUNC__
583 #define __FUNC__ "MatView_MPIDense"
584 int MatView_MPIDense(PetscObject obj,Viewer viewer)
585 {
586   Mat          mat = (Mat) obj;
587   int          ierr;
588   ViewerType   vtype;
589 
590   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
591   if (vtype == ASCII_FILE_VIEWER) {
592     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
593   }
594   else if (vtype == ASCII_FILES_VIEWER) {
595     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
596   }
597   else if (vtype == BINARY_FILE_VIEWER) {
598     return MatView_MPIDense_Binary(mat,viewer);
599   }
600   return 0;
601 }
602 
603 #undef __FUNC__
604 #define __FUNC__ "MatGetInfo_MPIDense"
605 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
606 {
607   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
608   Mat          mdn = mat->A;
609   int          ierr;
610   double       isend[5], irecv[5];
611 
612   info->rows_global    = (double)mat->M;
613   info->columns_global = (double)mat->N;
614   info->rows_local     = (double)mat->m;
615   info->columns_local  = (double)mat->N;
616   info->block_size     = 1.0;
617   ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr);
618   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
619   isend[3] = info->memory;  isend[4] = info->mallocs;
620   if (flag == MAT_LOCAL) {
621     info->nz_used      = isend[0];
622     info->nz_allocated = isend[1];
623     info->nz_unneeded  = isend[2];
624     info->memory       = isend[3];
625     info->mallocs      = isend[4];
626   } else if (flag == MAT_GLOBAL_MAX) {
627     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,A->comm);
628     info->nz_used      = irecv[0];
629     info->nz_allocated = irecv[1];
630     info->nz_unneeded  = irecv[2];
631     info->memory       = irecv[3];
632     info->mallocs      = irecv[4];
633   } else if (flag == MAT_GLOBAL_SUM) {
634     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,A->comm);
635     info->nz_used      = irecv[0];
636     info->nz_allocated = irecv[1];
637     info->nz_unneeded  = irecv[2];
638     info->memory       = irecv[3];
639     info->mallocs      = irecv[4];
640   }
641   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
642   info->fill_ratio_needed = 0;
643   info->factor_mallocs    = 0;
644   return 0;
645 }
646 
647 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
648    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
649    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
650    extern int MatSolve_MPIDense(Mat,Vec,Vec);
651    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
652    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
653    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
654 
655 #undef __FUNC__
656 #define __FUNC__ "MatSetOption_MPIDense"
657 int MatSetOption_MPIDense(Mat A,MatOption op)
658 {
659   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
660 
661   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
662       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
663       op == MAT_NEW_NONZERO_LOCATION_ERROR ||
664       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
665       op == MAT_COLUMNS_SORTED ||
666       op == MAT_COLUMNS_UNSORTED) {
667         MatSetOption(a->A,op);
668   } else if (op == MAT_ROW_ORIENTED) {
669         a->roworiented = 1;
670         MatSetOption(a->A,op);
671   } else if (op == MAT_ROWS_SORTED ||
672              op == MAT_ROWS_UNSORTED ||
673              op == MAT_SYMMETRIC ||
674              op == MAT_STRUCTURALLY_SYMMETRIC ||
675              op == MAT_YES_NEW_DIAGONALS)
676     PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n");
677   else if (op == MAT_COLUMN_ORIENTED)
678     {a->roworiented = 0; MatSetOption(a->A,op);}
679   else if (op == MAT_NO_NEW_DIAGONALS)
680     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
681   else
682     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
683   return 0;
684 }
685 
686 #undef __FUNC__
687 #define __FUNC__ "MatGetSize_MPIDense"
688 int MatGetSize_MPIDense(Mat A,int *m,int *n)
689 {
690   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
691   *m = mat->M; *n = mat->N;
692   return 0;
693 }
694 
695 #undef __FUNC__
696 #define __FUNC__ "MatGetLocalSize_MPIDense"
697 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
698 {
699   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
700   *m = mat->m; *n = mat->N;
701   return 0;
702 }
703 
704 #undef __FUNC__
705 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
706 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
707 {
708   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
709   *m = mat->rstart; *n = mat->rend;
710   return 0;
711 }
712 
713 #undef __FUNC__
714 #define __FUNC__ "MatGetRow_MPIDense"
715 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
716 {
717   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
718   int          lrow, rstart = mat->rstart, rend = mat->rend;
719 
720   if (row < rstart || row >= rend) SETERRQ(1,0,"only local rows")
721   lrow = row - rstart;
722   return MatGetRow(mat->A,lrow,nz,idx,v);
723 }
724 
725 #undef __FUNC__
726 #define __FUNC__ "MatRestoreRow_MPIDense"
727 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
728 {
729   if (idx) PetscFree(*idx);
730   if (v) PetscFree(*v);
731   return 0;
732 }
733 
734 #undef __FUNC__
735 #define __FUNC__ "MatNorm_MPIDense"
736 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
737 {
738   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
739   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
740   int          ierr, i, j;
741   double       sum = 0.0;
742   Scalar       *v = mat->v;
743 
744   if (mdn->size == 1) {
745     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
746   } else {
747     if (type == NORM_FROBENIUS) {
748       for (i=0; i<mat->n*mat->m; i++ ) {
749 #if defined(PETSC_COMPLEX)
750         sum += real(conj(*v)*(*v)); v++;
751 #else
752         sum += (*v)*(*v); v++;
753 #endif
754       }
755       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
756       *norm = sqrt(*norm);
757       PLogFlops(2*mat->n*mat->m);
758     }
759     else if (type == NORM_1) {
760       double *tmp, *tmp2;
761       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
762       tmp2 = tmp + mdn->N;
763       PetscMemzero(tmp,2*mdn->N*sizeof(double));
764       *norm = 0.0;
765       v = mat->v;
766       for ( j=0; j<mat->n; j++ ) {
767         for ( i=0; i<mat->m; i++ ) {
768           tmp[j] += PetscAbsScalar(*v);  v++;
769         }
770       }
771       MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
772       for ( j=0; j<mdn->N; j++ ) {
773         if (tmp2[j] > *norm) *norm = tmp2[j];
774       }
775       PetscFree(tmp);
776       PLogFlops(mat->n*mat->m);
777     }
778     else if (type == NORM_INFINITY) { /* max row norm */
779       double ntemp;
780       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
781       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
782     }
783     else {
784       SETERRQ(1,0,"No support for two norm");
785     }
786   }
787   return 0;
788 }
789 
790 #undef __FUNC__
791 #define __FUNC__ "MatTranspose_MPIDense"
792 int MatTranspose_MPIDense(Mat A,Mat *matout)
793 {
794   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
795   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
796   Mat          B;
797   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
798   int          j, i, ierr;
799   Scalar       *v;
800 
801   if (matout == PETSC_NULL && M != N) {
802     SETERRQ(1,0,"Supports square matrix only in-place");
803   }
804   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
805 
806   m = Aloc->m; n = Aloc->n; v = Aloc->v;
807   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
808   for ( j=0; j<n; j++ ) {
809     for (i=0; i<m; i++) rwork[i] = rstart + i;
810     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
811     v   += m;
812   }
813   PetscFree(rwork);
814   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
816   if (matout != PETSC_NULL) {
817     *matout = B;
818   } else {
819     /* This isn't really an in-place transpose, but free data struct from a */
820     PetscFree(a->rowners);
821     ierr = MatDestroy(a->A); CHKERRQ(ierr);
822     if (a->lvec) VecDestroy(a->lvec);
823     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
824     PetscFree(a);
825     PetscMemcpy(A,B,sizeof(struct _p_Mat));
826     PetscHeaderDestroy(B);
827   }
828   return 0;
829 }
830 
831 #include "pinclude/plapack.h"
832 #undef __FUNC__
833 #define __FUNC__ "MatScale_MPIDense"
834 int MatScale_MPIDense(Scalar *alpha,Mat inA)
835 {
836   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
837   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
838   int          one = 1, nz;
839 
840   nz = a->m*a->n;
841   BLscal_( &nz, alpha, a->v, &one );
842   PLogFlops(nz);
843   return 0;
844 }
845 
846 static int MatConvertSameType_MPIDense(Mat,Mat *,int);
847 
848 /* -------------------------------------------------------------------*/
849 static struct _MatOps MatOps = {MatSetValues_MPIDense,
850        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
851        MatMult_MPIDense,MatMultAdd_MPIDense,
852        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
853 /*       MatSolve_MPIDense,0, */
854        0,0,
855        0,0,
856        0,0,
857 /*       MatLUFactor_MPIDense,0, */
858        0,MatTranspose_MPIDense,
859        MatGetInfo_MPIDense,0,
860        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
861        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
862        0,
863        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
864        0,0,
865 /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
866        0,0,
867        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
868        MatGetOwnershipRange_MPIDense,
869        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
870        MatConvertSameType_MPIDense,
871        0,0,0,0,0,
872        0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense,
873        0,0,0,MatGetBlockSize_MPIDense};
874 
875 #undef __FUNC__
876 #define __FUNC__ "MatCreateMPIDense"
877 /*@C
878    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
879 
880    Input Parameters:
881 .  comm - MPI communicator
882 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
883 .  n - number of local columns (or PETSC_DECIDE to have calculated
884            if N is given)
885 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
886 .  N - number of global columns (or PETSC_DECIDE to have calculated
887            if n is given)
888 .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
889    to control all matrix memory allocation.
890 
891    Output Parameter:
892 .  A - the matrix
893 
894    Notes:
895    The dense format is fully compatible with standard Fortran 77
896    storage by columns.
897 
898    The data input variable is intended primarily for Fortran programmers
899    who wish to allocate their own matrix memory space.  Most users should
900    set data=PETSC_NULL.
901 
902    The user MUST specify either the local or global matrix dimensions
903    (possibly both).
904 
905    Currently, the only parallel dense matrix decomposition is by rows,
906    so that n=N and each submatrix owns all of the global columns.
907 
908 .keywords: matrix, dense, parallel
909 
910 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
911 @*/
912 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
913 {
914   Mat          mat;
915   Mat_MPIDense *a;
916   int          ierr, i,flg;
917 
918   /* Note:  For now, when data is specified above, this assumes the user correctly
919    allocates the local dense storage space.  We should add error checking. */
920 
921   *A = 0;
922   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIDENSE,comm);
923   PLogObjectCreate(mat);
924   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
925   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
926   mat->destroy    = MatDestroy_MPIDense;
927   mat->view       = MatView_MPIDense;
928   mat->factor     = 0;
929   mat->mapping    = 0;
930 
931   a->factor       = 0;
932   mat->insertmode = NOT_SET_VALUES;
933   MPI_Comm_rank(comm,&a->rank);
934   MPI_Comm_size(comm,&a->size);
935 
936   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
937   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
938 
939   /* each row stores all columns */
940   if (N == PETSC_DECIDE) N = n;
941   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
942   /*  if (n != N) SETERRQ(1,0,"For now, only n=N is supported"); */
943   a->N = mat->N = N;
944   a->M = mat->M = M;
945   a->m = mat->m = m;
946   a->n = mat->n = n;
947 
948   /* build local table of row and column ownerships */
949   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
950   a->cowners = a->rowners + a->size + 1;
951   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
952   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
953   a->rowners[0] = 0;
954   for ( i=2; i<=a->size; i++ ) {
955     a->rowners[i] += a->rowners[i-1];
956   }
957   a->rstart = a->rowners[a->rank];
958   a->rend   = a->rowners[a->rank+1];
959   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
960   a->cowners[0] = 0;
961   for ( i=2; i<=a->size; i++ ) {
962     a->cowners[i] += a->cowners[i-1];
963   }
964 
965   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
966   PLogObjectParent(mat,a->A);
967 
968   /* build cache for off array entries formed */
969   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
970 
971   /* stuff used for matrix vector multiply */
972   a->lvec        = 0;
973   a->Mvctx       = 0;
974   a->roworiented = 1;
975 
976   *A = mat;
977   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
978   if (flg) {
979     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
980   }
981   return 0;
982 }
983 
984 #undef __FUNC__
985 #define __FUNC__ "MatConvertSameType_MPIDense"
986 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
987 {
988   Mat          mat;
989   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
990   int          ierr;
991   FactorCtx    *factor;
992 
993   *newmat       = 0;
994   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
995   PLogObjectCreate(mat);
996   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
997   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
998   mat->destroy   = MatDestroy_MPIDense;
999   mat->view      = MatView_MPIDense;
1000   mat->factor    = A->factor;
1001   mat->assembled = PETSC_TRUE;
1002 
1003   a->m = mat->m = oldmat->m;
1004   a->n = mat->n = oldmat->n;
1005   a->M = mat->M = oldmat->M;
1006   a->N = mat->N = oldmat->N;
1007   if (oldmat->factor) {
1008     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
1009     /* copy factor contents ... add this code! */
1010   } else a->factor = 0;
1011 
1012   a->rstart       = oldmat->rstart;
1013   a->rend         = oldmat->rend;
1014   a->size         = oldmat->size;
1015   a->rank         = oldmat->rank;
1016   mat->insertmode = NOT_SET_VALUES;
1017 
1018   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1019   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1020   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1021   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1022 
1023   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1024   PLogObjectParent(mat,a->lvec);
1025   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1026   PLogObjectParent(mat,a->Mvctx);
1027   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1028   PLogObjectParent(mat,a->A);
1029   *newmat = mat;
1030   return 0;
1031 }
1032 
1033 #include "sys.h"
1034 
1035 #undef __FUNC__
1036 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1037 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1038 {
1039   int        *rowners, i,size,rank,m,rstart,rend,ierr,nz,j;
1040   Scalar     *array,*vals,*vals_ptr;
1041   MPI_Status status;
1042 
1043   MPI_Comm_rank(comm,&rank);
1044   MPI_Comm_size(comm,&size);
1045 
1046   /* determine ownership of all rows */
1047   m = M/size + ((M % size) > rank);
1048   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1049   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1050   rowners[0] = 0;
1051   for ( i=2; i<=size; i++ ) {
1052     rowners[i] += rowners[i-1];
1053   }
1054   rstart = rowners[rank];
1055   rend   = rowners[rank+1];
1056 
1057   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1058   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
1059 
1060   if (!rank) {
1061     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1062 
1063     /* read in my part of the matrix numerical values  */
1064     ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr);
1065 
1066     /* insert into matrix-by row (this is why cannot directly read into array */
1067     vals_ptr = vals;
1068     for ( i=0; i<m; i++ ) {
1069       for ( j=0; j<N; j++ ) {
1070         array[i + j*m] = *vals_ptr++;
1071       }
1072     }
1073 
1074     /* read in other processors and ship out */
1075     for ( i=1; i<size; i++ ) {
1076       nz   = (rowners[i+1] - rowners[i])*N;
1077       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1078       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1079     }
1080   }
1081   else {
1082     /* receive numeric values */
1083     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1084 
1085     /* receive message of values*/
1086     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1087 
1088     /* insert into matrix-by row (this is why cannot directly read into array */
1089     vals_ptr = vals;
1090     for ( i=0; i<m; i++ ) {
1091       for ( j=0; j<N; j++ ) {
1092         array[i + j*m] = *vals_ptr++;
1093       }
1094     }
1095   }
1096   PetscFree(rowners);
1097   PetscFree(vals);
1098   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1099   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1100   return 0;
1101 }
1102 
1103 
1104 #undef __FUNC__
1105 #define __FUNC__ "MatLoad_MPIDense"
1106 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1107 {
1108   Mat          A;
1109   int          i, nz, ierr, j,rstart, rend, fd;
1110   Scalar       *vals,*svals;
1111   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1112   MPI_Status   status;
1113   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1114   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1115   int          tag = ((PetscObject)viewer)->tag;
1116 
1117   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1118   if (!rank) {
1119     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1120     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1121     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1122   }
1123 
1124   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1125   M = header[1]; N = header[2]; nz = header[3];
1126 
1127   /*
1128        Handle case where matrix is stored on disk as a dense matrix
1129   */
1130   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1131     return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1132   }
1133 
1134   /* determine ownership of all rows */
1135   m = M/size + ((M % size) > rank);
1136   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1137   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1138   rowners[0] = 0;
1139   for ( i=2; i<=size; i++ ) {
1140     rowners[i] += rowners[i-1];
1141   }
1142   rstart = rowners[rank];
1143   rend   = rowners[rank+1];
1144 
1145   /* distribute row lengths to all processors */
1146   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1147   offlens = ourlens + (rend-rstart);
1148   if (!rank) {
1149     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1150     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1151     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1152     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1153     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1154     PetscFree(sndcounts);
1155   }
1156   else {
1157     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1158   }
1159 
1160   if (!rank) {
1161     /* calculate the number of nonzeros on each processor */
1162     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1163     PetscMemzero(procsnz,size*sizeof(int));
1164     for ( i=0; i<size; i++ ) {
1165       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1166         procsnz[i] += rowlengths[j];
1167       }
1168     }
1169     PetscFree(rowlengths);
1170 
1171     /* determine max buffer needed and allocate it */
1172     maxnz = 0;
1173     for ( i=0; i<size; i++ ) {
1174       maxnz = PetscMax(maxnz,procsnz[i]);
1175     }
1176     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1177 
1178     /* read in my part of the matrix column indices  */
1179     nz = procsnz[0];
1180     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1181     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1182 
1183     /* read in every one elses and ship off */
1184     for ( i=1; i<size; i++ ) {
1185       nz = procsnz[i];
1186       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1187       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1188     }
1189     PetscFree(cols);
1190   }
1191   else {
1192     /* determine buffer space needed for message */
1193     nz = 0;
1194     for ( i=0; i<m; i++ ) {
1195       nz += ourlens[i];
1196     }
1197     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1198 
1199     /* receive message of column indices*/
1200     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1201     MPI_Get_count(&status,MPI_INT,&maxnz);
1202     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1203   }
1204 
1205   /* loop over local rows, determining number of off diagonal entries */
1206   PetscMemzero(offlens,m*sizeof(int));
1207   jj = 0;
1208   for ( i=0; i<m; i++ ) {
1209     for ( j=0; j<ourlens[i]; j++ ) {
1210       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1211       jj++;
1212     }
1213   }
1214 
1215   /* create our matrix */
1216   for ( i=0; i<m; i++ ) {
1217     ourlens[i] -= offlens[i];
1218   }
1219   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1220   A = *newmat;
1221   for ( i=0; i<m; i++ ) {
1222     ourlens[i] += offlens[i];
1223   }
1224 
1225   if (!rank) {
1226     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1227 
1228     /* read in my part of the matrix numerical values  */
1229     nz = procsnz[0];
1230     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1231 
1232     /* insert into matrix */
1233     jj      = rstart;
1234     smycols = mycols;
1235     svals   = vals;
1236     for ( i=0; i<m; i++ ) {
1237       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1238       smycols += ourlens[i];
1239       svals   += ourlens[i];
1240       jj++;
1241     }
1242 
1243     /* read in other processors and ship out */
1244     for ( i=1; i<size; i++ ) {
1245       nz = procsnz[i];
1246       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1247       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1248     }
1249     PetscFree(procsnz);
1250   }
1251   else {
1252     /* receive numeric values */
1253     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1254 
1255     /* receive message of values*/
1256     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1257     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1258     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1259 
1260     /* insert into matrix */
1261     jj      = rstart;
1262     smycols = mycols;
1263     svals   = vals;
1264     for ( i=0; i<m; i++ ) {
1265       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1266       smycols += ourlens[i];
1267       svals   += ourlens[i];
1268       jj++;
1269     }
1270   }
1271   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1272 
1273   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1274   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1275   return 0;
1276 }
1277 
1278 
1279 
1280 
1281 
1282