xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 029af93f72d387caa45cf6909ac9aed2d04296ca)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.68 1997/03/13 16:33:44 curfman Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 #undef __FUNC__
13 #define __FUNC__ "MatSetValues_MPIDense"
14 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
15 {
16   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
17   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
18   int          roworiented = A->roworiented;
19 
20   for ( i=0; i<m; i++ ) {
21     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
22     if (idxm[i] >= A->M) SETERRQ(1,0,"Row too large");
23     if (idxm[i] >= rstart && idxm[i] < rend) {
24       row = idxm[i] - rstart;
25       if (roworiented) {
26         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
27       }
28       else {
29         for ( j=0; j<n; j++ ) {
30           if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
31           if (idxn[j] >= A->N) SETERRQ(1,0,"Column too large");
32           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
33         }
34       }
35     }
36     else {
37       if (roworiented) {
38         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
39       }
40       else { /* must stash each seperately */
41         row = idxm[i];
42         for ( j=0; j<n; j++ ) {
43           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
44         }
45       }
46     }
47   }
48   return 0;
49 }
50 
51 #undef __FUNC__
52 #define __FUNC__ "MatGetValues_MPIDense"
53 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
54 {
55   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
56   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
57 
58   for ( i=0; i<m; i++ ) {
59     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
60     if (idxm[i] >= mdn->M) SETERRQ(1,0,"Row too large");
61     if (idxm[i] >= rstart && idxm[i] < rend) {
62       row = idxm[i] - rstart;
63       for ( j=0; j<n; j++ ) {
64         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
65         if (idxn[j] >= mdn->N)
66           SETERRQ(1,0,"Column too large");
67         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
68       }
69     }
70     else {
71       SETERRQ(1,0,"Only local values currently supported");
72     }
73   }
74   return 0;
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetArray_MPIDense"
79 int MatGetArray_MPIDense(Mat A,Scalar **array)
80 {
81   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82   int ierr;
83 
84   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
85   return 0;
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatRestoreArray_MPIDense"
90 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
91 {
92   return 0;
93 }
94 
95 #undef __FUNC__
96 #define __FUNC__ "MatAssemblyBegin_MPIDense"
97 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
98 {
99   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
100   MPI_Comm     comm = mat->comm;
101   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
102   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
103   int          tag = mat->tag, *owner,*starts,count,ierr;
104   InsertMode   addv;
105   MPI_Request  *send_waits,*recv_waits;
106   Scalar       *rvalues,*svalues;
107 
108   /* make sure all processors are either in INSERTMODE or ADDMODE */
109   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
110   if (addv == (ADD_VALUES|INSERT_VALUES)) {
111     SETERRQ(1,0,"Cannot mix adds/inserts on different procs");
112   }
113   mat->insertmode = addv; /* in case this processor had no cache */
114 
115   /*  first count number of contributors to each processor */
116   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
117   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
118   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
119   for ( i=0; i<mdn->stash.n; i++ ) {
120     idx = mdn->stash.idx[i];
121     for ( j=0; j<size; j++ ) {
122       if (idx >= owners[j] && idx < owners[j+1]) {
123         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
124       }
125     }
126   }
127   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
128 
129   /* inform other processors of number of messages and max length*/
130   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
131   MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);
132   nreceives = work[rank];
133   if (nreceives > size) SETERRQ(1,0,"Internal PETSc error");
134   MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);
135   nmax = work[rank];
136   PetscFree(work);
137 
138   /* post receives:
139        1) each message will consist of ordered pairs
140      (global index,value) we store the global index as a double
141      to simplify the message passing.
142        2) since we don't know how long each individual message is we
143      allocate the largest needed buffer for each receive. Potentially
144      this is a lot of wasted space.
145 
146        This could be done better.
147   */
148   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
149   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
150   for ( i=0; i<nreceives; i++ ) {
151     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
152   }
153 
154   /* do sends:
155       1) starts[i] gives the starting index in svalues for stuff going to
156          the ith processor
157   */
158   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
159   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
160   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
161   starts[0] = 0;
162   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   for ( i=0; i<mdn->stash.n; i++ ) {
164     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
165     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
166     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
167   }
168   PetscFree(owner);
169   starts[0] = 0;
170   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
171   count = 0;
172   for ( i=0; i<size; i++ ) {
173     if (procs[i]) {
174       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);
175     }
176   }
177   PetscFree(starts); PetscFree(nprocs);
178 
179   /* Free cache space */
180   PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n);
181   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
182 
183   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
184   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
185   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
186   mdn->rmax       = nmax;
187 
188   return 0;
189 }
190 extern int MatSetUpMultiply_MPIDense(Mat);
191 
192 #undef __FUNC__
193 #define __FUNC__ "MatAssemblyEnd_MPIDense"
194 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
195 {
196   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
197   MPI_Status   *send_status,recv_status;
198   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
199   Scalar       *values,val;
200   InsertMode   addv = mat->insertmode;
201 
202   /*  wait on receives */
203   while (count) {
204     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
205     /* unpack receives into our local space */
206     values = mdn->rvalues + 3*imdex*mdn->rmax;
207     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
208     n = n/3;
209     for ( i=0; i<n; i++ ) {
210       row = (int) PetscReal(values[3*i]) - mdn->rstart;
211       col = (int) PetscReal(values[3*i+1]);
212       val = values[3*i+2];
213       if (col >= 0 && col < mdn->N) {
214         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
215       }
216       else {SETERRQ(1,0,"Invalid column");}
217     }
218     count--;
219   }
220   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
221 
222   /* wait on sends */
223   if (mdn->nsends) {
224     send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
225     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
226     PetscFree(send_status);
227   }
228   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
229 
230   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
235   }
236   return 0;
237 }
238 
239 #undef __FUNC__
240 #define __FUNC__ "MatZeroEntries_MPIDense"
241 int MatZeroEntries_MPIDense(Mat A)
242 {
243   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
244   return MatZeroEntries(l->A);
245 }
246 
247 #undef __FUNC__
248 #define __FUNC__ "MatGetBlockSize_MPIDense"
249 int MatGetBlockSize_MPIDense(Mat A,int *bs)
250 {
251   *bs = 1;
252   return 0;
253 }
254 
255 /* the code does not do the diagonal entries correctly unless the
256    matrix is square and the column and row owerships are identical.
257    This is a BUG. The only way to fix it seems to be to access
258    mdn->A and mdn->B directly and not through the MatZeroRows()
259    routine.
260 */
261 #undef __FUNC__
262 #define __FUNC__ "MatZeroRows_MPIDense"
263 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
264 {
265   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
266   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
267   int            *procs,*nprocs,j,found,idx,nsends,*work;
268   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
269   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
270   int            *lens,imdex,*lrows,*values;
271   MPI_Comm       comm = A->comm;
272   MPI_Request    *send_waits,*recv_waits;
273   MPI_Status     recv_status,*send_status;
274   IS             istmp;
275 
276   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
277   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
278 
279   /*  first count number of contributors to each processor */
280   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
281   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
282   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
283   for ( i=0; i<N; i++ ) {
284     idx = rows[i];
285     found = 0;
286     for ( j=0; j<size; j++ ) {
287       if (idx >= owners[j] && idx < owners[j+1]) {
288         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
289       }
290     }
291     if (!found) SETERRQ(1,0,"Index out of range");
292   }
293   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
294 
295   /* inform other processors of number of messages and max length*/
296   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
297   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
298   nrecvs = work[rank];
299   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
300   nmax = work[rank];
301   PetscFree(work);
302 
303   /* post receives:   */
304   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
305   CHKPTRQ(rvalues);
306   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
307   CHKPTRQ(recv_waits);
308   for ( i=0; i<nrecvs; i++ ) {
309     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
310   }
311 
312   /* do sends:
313       1) starts[i] gives the starting index in svalues for stuff going to
314          the ith processor
315   */
316   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
317   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
318   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
319   starts[0]  = 0;
320   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
321   for ( i=0; i<N; i++ ) {
322     svalues[starts[owner[i]]++] = rows[i];
323   }
324   ISRestoreIndices(is,&rows);
325 
326   starts[0] = 0;
327   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
328   count = 0;
329   for ( i=0; i<size; i++ ) {
330     if (procs[i]) {
331       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
332     }
333   }
334   PetscFree(starts);
335 
336   base = owners[rank];
337 
338   /*  wait on receives */
339   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
340   source = lens + nrecvs;
341   count  = nrecvs; slen = 0;
342   while (count) {
343     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
344     /* unpack receives into our local space */
345     MPI_Get_count(&recv_status,MPI_INT,&n);
346     source[imdex]  = recv_status.MPI_SOURCE;
347     lens[imdex]  = n;
348     slen += n;
349     count--;
350   }
351   PetscFree(recv_waits);
352 
353   /* move the data into the send scatter */
354   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
355   count = 0;
356   for ( i=0; i<nrecvs; i++ ) {
357     values = rvalues + i*nmax;
358     for ( j=0; j<lens[i]; j++ ) {
359       lrows[count++] = values[j] - base;
360     }
361   }
362   PetscFree(rvalues); PetscFree(lens);
363   PetscFree(owner); PetscFree(nprocs);
364 
365   /* actually zap the local rows */
366   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
367   PLogObjectParent(A,istmp);
368   PetscFree(lrows);
369   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
370   ierr = ISDestroy(istmp); CHKERRQ(ierr);
371 
372   /* wait on sends */
373   if (nsends) {
374     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
375     MPI_Waitall(nsends,send_waits,send_status);
376     PetscFree(send_status);
377   }
378   PetscFree(send_waits); PetscFree(svalues);
379 
380   return 0;
381 }
382 
383 #undef __FUNC__
384 #define __FUNC__ "MatMult_MPIDense"
385 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
386 {
387   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
388   int          ierr;
389 
390   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
391   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
392   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
393   return 0;
394 }
395 
396 #undef __FUNC__
397 #define __FUNC__ "MatMultAdd_MPIDense"
398 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
399 {
400   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
401   int          ierr;
402 
403   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
404   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
405   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
406   return 0;
407 }
408 
409 #undef __FUNC__
410 #define __FUNC__ "MatMultTrans_MPIDense"
411 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
412 {
413   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
414   int          ierr;
415   Scalar       zero = 0.0;
416 
417   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
418   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
419   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
420   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
421   return 0;
422 }
423 
424 #undef __FUNC__
425 #define __FUNC__ "MatMultTransAdd_MPIDense"
426 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
427 {
428   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
429   int          ierr;
430 
431   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
432   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
433   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
434   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
435   return 0;
436 }
437 
438 #undef __FUNC__
439 #define __FUNC__ "MatGetDiagonal_MPIDense"
440 int MatGetDiagonal_MPIDense(Mat A,Vec v)
441 {
442   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
443   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
444   int          ierr, len, i, n, m = a->m, radd;
445   Scalar       *x, zero = 0.0;
446 
447   VecSet(&zero,v);
448   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
449   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
450   if (n != a->M) SETERRQ(1,0,"Nonconforming mat and vec");
451   len = PetscMin(aloc->m,aloc->n);
452   radd = a->rstart*m;
453   for ( i=0; i<len; i++ ) {
454     x[i] = aloc->v[radd + i*m + i];
455   }
456   return 0;
457 }
458 
459 #undef __FUNC__
460 #define __FUNC__ "MatDestroy_MPIDense"
461 int MatDestroy_MPIDense(PetscObject obj)
462 {
463   Mat          mat = (Mat) obj;
464   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
465   int          ierr;
466 
467 #if defined(PETSC_LOG)
468   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
469 #endif
470   PetscFree(mdn->rowners);
471   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
472   if (mdn->lvec)   VecDestroy(mdn->lvec);
473   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
474   if (mdn->factor) {
475     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
476     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
477     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
478     PetscFree(mdn->factor);
479   }
480   PetscFree(mdn);
481   if (mat->mapping) {
482     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
483   }
484   PLogObjectDestroy(mat);
485   PetscHeaderDestroy(mat);
486   return 0;
487 }
488 
489 #include "pinclude/pviewer.h"
490 
491 #undef __FUNC__
492 #define __FUNC__ "MatView_MPIDense_Binary"
493 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
494 {
495   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
496   int          ierr;
497 
498   if (mdn->size == 1) {
499     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
500   }
501   else SETERRQ(1,0,"Only uniprocessor output supported");
502   return 0;
503 }
504 
505 #undef __FUNC__
506 #define __FUNC__ "MatView_MPIDense_ASCII"
507 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
508 {
509   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
510   int          ierr, format;
511   FILE         *fd;
512   ViewerType   vtype;
513 
514   ViewerGetType(viewer,&vtype);
515   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
516   ierr = ViewerGetFormat(viewer,&format);
517   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
518     int rank;
519     MatInfo info;
520     MPI_Comm_rank(mat->comm,&rank);
521     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
522     PetscSequentialPhaseBegin(mat->comm,1);
523       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
524          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
525       fflush(fd);
526     PetscSequentialPhaseEnd(mat->comm,1);
527     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
528     return 0;
529   }
530   else if (format == VIEWER_FORMAT_ASCII_INFO) {
531     return 0;
532   }
533   if (vtype == ASCII_FILE_VIEWER) {
534     PetscSequentialPhaseBegin(mat->comm,1);
535     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
536              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
537     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
538     fflush(fd);
539     PetscSequentialPhaseEnd(mat->comm,1);
540   }
541   else {
542     int size = mdn->size, rank = mdn->rank;
543     if (size == 1) {
544       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
545     }
546     else {
547       /* assemble the entire matrix onto first processor. */
548       Mat          A;
549       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
550       Scalar       *vals;
551       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
552 
553       if (!rank) {
554         ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
555       }
556       else {
557         ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
558       }
559       PLogObjectParent(mat,A);
560 
561       /* Copy the matrix ... This isn't the most efficient means,
562          but it's quick for now */
563       row = mdn->rstart; m = Amdn->m;
564       for ( i=0; i<m; i++ ) {
565         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
566         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
567         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
568         row++;
569       }
570 
571       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
572       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
573       if (!rank) {
574         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
575       }
576       ierr = MatDestroy(A); CHKERRQ(ierr);
577     }
578   }
579   return 0;
580 }
581 
582 #undef __FUNC__
583 #define __FUNC__ "MatView_MPIDense"
584 int MatView_MPIDense(PetscObject obj,Viewer viewer)
585 {
586   Mat          mat = (Mat) obj;
587   int          ierr;
588   ViewerType   vtype;
589 
590   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
591   if (vtype == ASCII_FILE_VIEWER) {
592     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
593   }
594   else if (vtype == ASCII_FILES_VIEWER) {
595     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
596   }
597   else if (vtype == BINARY_FILE_VIEWER) {
598     return MatView_MPIDense_Binary(mat,viewer);
599   }
600   return 0;
601 }
602 
603 #undef __FUNC__
604 #define __FUNC__ "MatGetInfo_MPIDense"
605 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
606 {
607   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
608   Mat          mdn = mat->A;
609   int          ierr;
610   double       isend[5], irecv[5];
611 
612   info->rows_global    = (double)mat->M;
613   info->columns_global = (double)mat->N;
614   info->rows_local     = (double)mat->m;
615   info->columns_local  = (double)mat->N;
616   info->block_size     = 1.0;
617   ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr);
618   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
619   isend[3] = info->memory;  isend[4] = info->mallocs;
620   if (flag == MAT_LOCAL) {
621     info->nz_used      = isend[0];
622     info->nz_allocated = isend[1];
623     info->nz_unneeded  = isend[2];
624     info->memory       = isend[3];
625     info->mallocs      = isend[4];
626   } else if (flag == MAT_GLOBAL_MAX) {
627     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,A->comm);
628     info->nz_used      = irecv[0];
629     info->nz_allocated = irecv[1];
630     info->nz_unneeded  = irecv[2];
631     info->memory       = irecv[3];
632     info->mallocs      = irecv[4];
633   } else if (flag == MAT_GLOBAL_SUM) {
634     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,A->comm);
635     info->nz_used      = irecv[0];
636     info->nz_allocated = irecv[1];
637     info->nz_unneeded  = irecv[2];
638     info->memory       = irecv[3];
639     info->mallocs      = irecv[4];
640   }
641   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
642   info->fill_ratio_needed = 0;
643   info->factor_mallocs    = 0;
644   return 0;
645 }
646 
647 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
648    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
649    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
650    extern int MatSolve_MPIDense(Mat,Vec,Vec);
651    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
652    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
653    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
654 
655 #undef __FUNC__
656 #define __FUNC__ "MatSetOption_MPIDense"
657 int MatSetOption_MPIDense(Mat A,MatOption op)
658 {
659   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
660 
661   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
662       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
663       op == MAT_NEW_NONZERO_LOCATION_ERROR ||
664       op == MAT_COLUMNS_SORTED ||
665       op == MAT_COLUMNS_UNSORTED) {
666         MatSetOption(a->A,op);
667   } else if (op == MAT_ROW_ORIENTED) {
668         a->roworiented = 1;
669         MatSetOption(a->A,op);
670   } else if (op == MAT_ROWS_SORTED ||
671              op == MAT_ROWS_UNSORTED ||
672              op == MAT_SYMMETRIC ||
673              op == MAT_STRUCTURALLY_SYMMETRIC ||
674              op == MAT_YES_NEW_DIAGONALS)
675     PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n");
676   else if (op == MAT_COLUMN_ORIENTED)
677     {a->roworiented = 0; MatSetOption(a->A,op);}
678   else if (op == MAT_NO_NEW_DIAGONALS)
679     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
680   else
681     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
682   return 0;
683 }
684 
685 #undef __FUNC__
686 #define __FUNC__ "MatGetSize_MPIDense"
687 int MatGetSize_MPIDense(Mat A,int *m,int *n)
688 {
689   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
690   *m = mat->M; *n = mat->N;
691   return 0;
692 }
693 
694 #undef __FUNC__
695 #define __FUNC__ "MatGetLocalSize_MPIDense"
696 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
697 {
698   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
699   *m = mat->m; *n = mat->N;
700   return 0;
701 }
702 
703 #undef __FUNC__
704 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
705 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
706 {
707   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
708   *m = mat->rstart; *n = mat->rend;
709   return 0;
710 }
711 
712 #undef __FUNC__
713 #define __FUNC__ "MatGetRow_MPIDense"
714 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
715 {
716   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
717   int          lrow, rstart = mat->rstart, rend = mat->rend;
718 
719   if (row < rstart || row >= rend) SETERRQ(1,0,"only local rows")
720   lrow = row - rstart;
721   return MatGetRow(mat->A,lrow,nz,idx,v);
722 }
723 
724 #undef __FUNC__
725 #define __FUNC__ "MatRestoreRow_MPIDense"
726 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
727 {
728   if (idx) PetscFree(*idx);
729   if (v) PetscFree(*v);
730   return 0;
731 }
732 
733 #undef __FUNC__
734 #define __FUNC__ "MatNorm_MPIDense"
735 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
736 {
737   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
738   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
739   int          ierr, i, j;
740   double       sum = 0.0;
741   Scalar       *v = mat->v;
742 
743   if (mdn->size == 1) {
744     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
745   } else {
746     if (type == NORM_FROBENIUS) {
747       for (i=0; i<mat->n*mat->m; i++ ) {
748 #if defined(PETSC_COMPLEX)
749         sum += real(conj(*v)*(*v)); v++;
750 #else
751         sum += (*v)*(*v); v++;
752 #endif
753       }
754       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
755       *norm = sqrt(*norm);
756       PLogFlops(2*mat->n*mat->m);
757     }
758     else if (type == NORM_1) {
759       double *tmp, *tmp2;
760       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
761       tmp2 = tmp + mdn->N;
762       PetscMemzero(tmp,2*mdn->N*sizeof(double));
763       *norm = 0.0;
764       v = mat->v;
765       for ( j=0; j<mat->n; j++ ) {
766         for ( i=0; i<mat->m; i++ ) {
767           tmp[j] += PetscAbsScalar(*v);  v++;
768         }
769       }
770       MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
771       for ( j=0; j<mdn->N; j++ ) {
772         if (tmp2[j] > *norm) *norm = tmp2[j];
773       }
774       PetscFree(tmp);
775       PLogFlops(mat->n*mat->m);
776     }
777     else if (type == NORM_INFINITY) { /* max row norm */
778       double ntemp;
779       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
780       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
781     }
782     else {
783       SETERRQ(1,0,"No support for two norm");
784     }
785   }
786   return 0;
787 }
788 
789 #undef __FUNC__
790 #define __FUNC__ "MatTranspose_MPIDense"
791 int MatTranspose_MPIDense(Mat A,Mat *matout)
792 {
793   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
794   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
795   Mat          B;
796   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
797   int          j, i, ierr;
798   Scalar       *v;
799 
800   if (matout == PETSC_NULL && M != N) {
801     SETERRQ(1,0,"Supports square matrix only in-place");
802   }
803   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
804 
805   m = Aloc->m; n = Aloc->n; v = Aloc->v;
806   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
807   for ( j=0; j<n; j++ ) {
808     for (i=0; i<m; i++) rwork[i] = rstart + i;
809     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
810     v   += m;
811   }
812   PetscFree(rwork);
813   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
814   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
815   if (matout != PETSC_NULL) {
816     *matout = B;
817   } else {
818     /* This isn't really an in-place transpose, but free data struct from a */
819     PetscFree(a->rowners);
820     ierr = MatDestroy(a->A); CHKERRQ(ierr);
821     if (a->lvec) VecDestroy(a->lvec);
822     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
823     PetscFree(a);
824     PetscMemcpy(A,B,sizeof(struct _Mat));
825     PetscHeaderDestroy(B);
826   }
827   return 0;
828 }
829 
830 #include "pinclude/plapack.h"
831 #undef __FUNC__
832 #define __FUNC__ "MatScale_MPIDense"
833 int MatScale_MPIDense(Scalar *alpha,Mat inA)
834 {
835   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
836   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
837   int          one = 1, nz;
838 
839   nz = a->m*a->n;
840   BLscal_( &nz, alpha, a->v, &one );
841   PLogFlops(nz);
842   return 0;
843 }
844 
845 static int MatConvertSameType_MPIDense(Mat,Mat *,int);
846 
847 /* -------------------------------------------------------------------*/
848 static struct _MatOps MatOps = {MatSetValues_MPIDense,
849        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
850        MatMult_MPIDense,MatMultAdd_MPIDense,
851        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
852 /*       MatSolve_MPIDense,0, */
853        0,0,
854        0,0,
855        0,0,
856 /*       MatLUFactor_MPIDense,0, */
857        0,MatTranspose_MPIDense,
858        MatGetInfo_MPIDense,0,
859        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
860        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
861        0,
862        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
863        0,0,
864 /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
865        0,0,
866        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
867        MatGetOwnershipRange_MPIDense,
868        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
869        MatConvertSameType_MPIDense,
870        0,0,0,0,0,
871        0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense,
872        0,0,0,MatGetBlockSize_MPIDense};
873 
874 #undef __FUNC__
875 #define __FUNC__ "MatCreateMPIDense"
876 /*@C
877    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
878 
879    Input Parameters:
880 .  comm - MPI communicator
881 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
882 .  n - number of local columns (or PETSC_DECIDE to have calculated
883            if N is given)
884 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
885 .  N - number of global columns (or PETSC_DECIDE to have calculated
886            if n is given)
887 .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
888    to control all matrix memory allocation.
889 
890    Output Parameter:
891 .  A - the matrix
892 
893    Notes:
894    The dense format is fully compatible with standard Fortran 77
895    storage by columns.
896 
897    The data input variable is intended primarily for Fortran programmers
898    who wish to allocate their own matrix memory space.  Most users should
899    set data=PETSC_NULL.
900 
901    The user MUST specify either the local or global matrix dimensions
902    (possibly both).
903 
904    Currently, the only parallel dense matrix decomposition is by rows,
905    so that n=N and each submatrix owns all of the global columns.
906 
907 .keywords: matrix, dense, parallel
908 
909 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
910 @*/
911 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
912 {
913   Mat          mat;
914   Mat_MPIDense *a;
915   int          ierr, i,flg;
916 
917   /* Note:  For now, when data is specified above, this assumes the user correctly
918    allocates the local dense storage space.  We should add error checking. */
919 
920   *A = 0;
921   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
922   PLogObjectCreate(mat);
923   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
924   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
925   mat->destroy    = MatDestroy_MPIDense;
926   mat->view       = MatView_MPIDense;
927   mat->factor     = 0;
928   mat->mapping    = 0;
929 
930   a->factor       = 0;
931   mat->insertmode = NOT_SET_VALUES;
932   MPI_Comm_rank(comm,&a->rank);
933   MPI_Comm_size(comm,&a->size);
934 
935   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
936   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
937 
938   /* each row stores all columns */
939   if (N == PETSC_DECIDE) N = n;
940   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
941   /*  if (n != N) SETERRQ(1,0,"For now, only n=N is supported"); */
942   a->N = mat->N = N;
943   a->M = mat->M = M;
944   a->m = mat->m = m;
945   a->n = mat->n = n;
946 
947   /* build local table of row and column ownerships */
948   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
949   a->cowners = a->rowners + a->size + 1;
950   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
951   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
952   a->rowners[0] = 0;
953   for ( i=2; i<=a->size; i++ ) {
954     a->rowners[i] += a->rowners[i-1];
955   }
956   a->rstart = a->rowners[a->rank];
957   a->rend   = a->rowners[a->rank+1];
958   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
959   a->cowners[0] = 0;
960   for ( i=2; i<=a->size; i++ ) {
961     a->cowners[i] += a->cowners[i-1];
962   }
963 
964   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
965   PLogObjectParent(mat,a->A);
966 
967   /* build cache for off array entries formed */
968   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
969 
970   /* stuff used for matrix vector multiply */
971   a->lvec        = 0;
972   a->Mvctx       = 0;
973   a->roworiented = 1;
974 
975   *A = mat;
976   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
977   if (flg) {
978     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
979   }
980   return 0;
981 }
982 
983 #undef __FUNC__
984 #define __FUNC__ "MatConvertSameType_MPIDense"
985 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
986 {
987   Mat          mat;
988   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
989   int          ierr;
990   FactorCtx    *factor;
991 
992   *newmat       = 0;
993   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
994   PLogObjectCreate(mat);
995   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
996   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
997   mat->destroy   = MatDestroy_MPIDense;
998   mat->view      = MatView_MPIDense;
999   mat->factor    = A->factor;
1000   mat->assembled = PETSC_TRUE;
1001 
1002   a->m = mat->m = oldmat->m;
1003   a->n = mat->n = oldmat->n;
1004   a->M = mat->M = oldmat->M;
1005   a->N = mat->N = oldmat->N;
1006   if (oldmat->factor) {
1007     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
1008     /* copy factor contents ... add this code! */
1009   } else a->factor = 0;
1010 
1011   a->rstart       = oldmat->rstart;
1012   a->rend         = oldmat->rend;
1013   a->size         = oldmat->size;
1014   a->rank         = oldmat->rank;
1015   mat->insertmode = NOT_SET_VALUES;
1016 
1017   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1018   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
1019   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1020   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1021 
1022   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1023   PLogObjectParent(mat,a->lvec);
1024   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1025   PLogObjectParent(mat,a->Mvctx);
1026   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1027   PLogObjectParent(mat,a->A);
1028   *newmat = mat;
1029   return 0;
1030 }
1031 
1032 #include "sys.h"
1033 
1034 #undef __FUNC__
1035 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1036 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1037 {
1038   int        *rowners, i,size,rank,m,rstart,rend,ierr,nz,j;
1039   Scalar     *array,*vals,*vals_ptr;
1040   MPI_Status status;
1041 
1042   MPI_Comm_rank(comm,&rank);
1043   MPI_Comm_size(comm,&size);
1044 
1045   /* determine ownership of all rows */
1046   m = M/size + ((M % size) > rank);
1047   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1048   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1049   rowners[0] = 0;
1050   for ( i=2; i<=size; i++ ) {
1051     rowners[i] += rowners[i-1];
1052   }
1053   rstart = rowners[rank];
1054   rend   = rowners[rank+1];
1055 
1056   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1057   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
1058 
1059   if (!rank) {
1060     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1061 
1062     /* read in my part of the matrix numerical values  */
1063     ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr);
1064 
1065     /* insert into matrix-by row (this is why cannot directly read into array */
1066     vals_ptr = vals;
1067     for ( i=0; i<m; i++ ) {
1068       for ( j=0; j<N; j++ ) {
1069         array[i + j*m] = *vals_ptr++;
1070       }
1071     }
1072 
1073     /* read in other processors and ship out */
1074     for ( i=1; i<size; i++ ) {
1075       nz   = (rowners[i+1] - rowners[i])*N;
1076       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1077       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1078     }
1079   }
1080   else {
1081     /* receive numeric values */
1082     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1083 
1084     /* receive message of values*/
1085     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1086 
1087     /* insert into matrix-by row (this is why cannot directly read into array */
1088     vals_ptr = vals;
1089     for ( i=0; i<m; i++ ) {
1090       for ( j=0; j<N; j++ ) {
1091         array[i + j*m] = *vals_ptr++;
1092       }
1093     }
1094   }
1095   PetscFree(rowners);
1096   PetscFree(vals);
1097   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1098   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1099   return 0;
1100 }
1101 
1102 
1103 #undef __FUNC__
1104 #define __FUNC__ "MatLoad_MPIDense"
1105 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1106 {
1107   Mat          A;
1108   int          i, nz, ierr, j,rstart, rend, fd;
1109   Scalar       *vals,*svals;
1110   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1111   MPI_Status   status;
1112   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1113   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1114   int          tag = ((PetscObject)viewer)->tag;
1115 
1116   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1117   if (!rank) {
1118     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1119     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1120     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1121   }
1122 
1123   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1124   M = header[1]; N = header[2]; nz = header[3];
1125 
1126   /*
1127        Handle case where matrix is stored on disk as a dense matrix
1128   */
1129   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1130     return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1131   }
1132 
1133   /* determine ownership of all rows */
1134   m = M/size + ((M % size) > rank);
1135   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1136   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1137   rowners[0] = 0;
1138   for ( i=2; i<=size; i++ ) {
1139     rowners[i] += rowners[i-1];
1140   }
1141   rstart = rowners[rank];
1142   rend   = rowners[rank+1];
1143 
1144   /* distribute row lengths to all processors */
1145   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1146   offlens = ourlens + (rend-rstart);
1147   if (!rank) {
1148     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1149     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1150     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1151     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1152     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1153     PetscFree(sndcounts);
1154   }
1155   else {
1156     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1157   }
1158 
1159   if (!rank) {
1160     /* calculate the number of nonzeros on each processor */
1161     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1162     PetscMemzero(procsnz,size*sizeof(int));
1163     for ( i=0; i<size; i++ ) {
1164       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1165         procsnz[i] += rowlengths[j];
1166       }
1167     }
1168     PetscFree(rowlengths);
1169 
1170     /* determine max buffer needed and allocate it */
1171     maxnz = 0;
1172     for ( i=0; i<size; i++ ) {
1173       maxnz = PetscMax(maxnz,procsnz[i]);
1174     }
1175     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1176 
1177     /* read in my part of the matrix column indices  */
1178     nz = procsnz[0];
1179     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1180     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1181 
1182     /* read in every one elses and ship off */
1183     for ( i=1; i<size; i++ ) {
1184       nz = procsnz[i];
1185       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1186       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1187     }
1188     PetscFree(cols);
1189   }
1190   else {
1191     /* determine buffer space needed for message */
1192     nz = 0;
1193     for ( i=0; i<m; i++ ) {
1194       nz += ourlens[i];
1195     }
1196     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1197 
1198     /* receive message of column indices*/
1199     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1200     MPI_Get_count(&status,MPI_INT,&maxnz);
1201     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1202   }
1203 
1204   /* loop over local rows, determining number of off diagonal entries */
1205   PetscMemzero(offlens,m*sizeof(int));
1206   jj = 0;
1207   for ( i=0; i<m; i++ ) {
1208     for ( j=0; j<ourlens[i]; j++ ) {
1209       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1210       jj++;
1211     }
1212   }
1213 
1214   /* create our matrix */
1215   for ( i=0; i<m; i++ ) {
1216     ourlens[i] -= offlens[i];
1217   }
1218   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1219   A = *newmat;
1220   for ( i=0; i<m; i++ ) {
1221     ourlens[i] += offlens[i];
1222   }
1223 
1224   if (!rank) {
1225     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1226 
1227     /* read in my part of the matrix numerical values  */
1228     nz = procsnz[0];
1229     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1230 
1231     /* insert into matrix */
1232     jj      = rstart;
1233     smycols = mycols;
1234     svals   = vals;
1235     for ( i=0; i<m; i++ ) {
1236       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1237       smycols += ourlens[i];
1238       svals   += ourlens[i];
1239       jj++;
1240     }
1241 
1242     /* read in other processors and ship out */
1243     for ( i=1; i<size; i++ ) {
1244       nz = procsnz[i];
1245       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1246       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1247     }
1248     PetscFree(procsnz);
1249   }
1250   else {
1251     /* receive numeric values */
1252     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1253 
1254     /* receive message of values*/
1255     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1256     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1257     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1258 
1259     /* insert into matrix */
1260     jj      = rstart;
1261     smycols = mycols;
1262     svals   = vals;
1263     for ( i=0; i<m; i++ ) {
1264       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1265       smycols += ourlens[i];
1266       svals   += ourlens[i];
1267       jj++;
1268     }
1269   }
1270   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1271 
1272   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1273   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1274   return 0;
1275 }
1276 
1277 
1278 
1279 
1280 
1281