xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision cd2245dea71a3aa4fce336c3d5cb6692bd00b1f7)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.65 1997/03/09 17:40:01 curfman Exp curfman $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 #undef __FUNC__
13 #define __FUNC__ "MatSetValues_MPIDense"
14 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
15 {
16   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
17   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
18   int          roworiented = A->roworiented;
19 
20   for ( i=0; i<m; i++ ) {
21     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
22     if (idxm[i] >= A->M) SETERRQ(1,0,"Row too large");
23     if (idxm[i] >= rstart && idxm[i] < rend) {
24       row = idxm[i] - rstart;
25       if (roworiented) {
26         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
27       }
28       else {
29         for ( j=0; j<n; j++ ) {
30           if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
31           if (idxn[j] >= A->N) SETERRQ(1,0,"Column too large");
32           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
33         }
34       }
35     }
36     else {
37       if (roworiented) {
38         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
39       }
40       else { /* must stash each seperately */
41         row = idxm[i];
42         for ( j=0; j<n; j++ ) {
43           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
44         }
45       }
46     }
47   }
48   return 0;
49 }
50 
51 #undef __FUNC__
52 #define __FUNC__ "MatGetValues_MPIDense"
53 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
54 {
55   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
56   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
57 
58   for ( i=0; i<m; i++ ) {
59     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
60     if (idxm[i] >= mdn->M) SETERRQ(1,0,"Row too large");
61     if (idxm[i] >= rstart && idxm[i] < rend) {
62       row = idxm[i] - rstart;
63       for ( j=0; j<n; j++ ) {
64         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
65         if (idxn[j] >= mdn->N)
66           SETERRQ(1,0,"Column too large");
67         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
68       }
69     }
70     else {
71       SETERRQ(1,0,"Only local values currently supported");
72     }
73   }
74   return 0;
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetArray_MPIDense"
79 static int MatGetArray_MPIDense(Mat A,Scalar **array)
80 {
81   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82   int ierr;
83 
84   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
85   return 0;
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatRestoreArray_MPIDense"
90 static int MatRestoreArray_MPIDense(Mat A,Scalar **array)
91 {
92   return 0;
93 }
94 
95 #undef __FUNC__
96 #define __FUNC__ "MatAssemblyBegin_MPIDense"
97 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
98 {
99   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
100   MPI_Comm     comm = mat->comm;
101   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
102   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
103   int          tag = mat->tag, *owner,*starts,count,ierr;
104   InsertMode   addv;
105   MPI_Request  *send_waits,*recv_waits;
106   Scalar       *rvalues,*svalues;
107 
108   /* make sure all processors are either in INSERTMODE or ADDMODE */
109   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
110   if (addv == (ADD_VALUES|INSERT_VALUES)) {
111     SETERRQ(1,0,"Cannot mix adds/inserts on different procs");
112   }
113   mat->insertmode = addv; /* in case this processor had no cache */
114 
115   /*  first count number of contributors to each processor */
116   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
117   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
118   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
119   for ( i=0; i<mdn->stash.n; i++ ) {
120     idx = mdn->stash.idx[i];
121     for ( j=0; j<size; j++ ) {
122       if (idx >= owners[j] && idx < owners[j+1]) {
123         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
124       }
125     }
126   }
127   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
128 
129   /* inform other processors of number of messages and max length*/
130   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
131   MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);
132   nreceives = work[rank];
133   if (nreceives > size) SETERRQ(1,0,"Internal PETSc error");
134   MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);
135   nmax = work[rank];
136   PetscFree(work);
137 
138   /* post receives:
139        1) each message will consist of ordered pairs
140      (global index,value) we store the global index as a double
141      to simplify the message passing.
142        2) since we don't know how long each individual message is we
143      allocate the largest needed buffer for each receive. Potentially
144      this is a lot of wasted space.
145 
146        This could be done better.
147   */
148   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
149   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
150   for ( i=0; i<nreceives; i++ ) {
151     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
152   }
153 
154   /* do sends:
155       1) starts[i] gives the starting index in svalues for stuff going to
156          the ith processor
157   */
158   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
159   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
160   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
161   starts[0] = 0;
162   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   for ( i=0; i<mdn->stash.n; i++ ) {
164     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
165     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
166     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
167   }
168   PetscFree(owner);
169   starts[0] = 0;
170   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
171   count = 0;
172   for ( i=0; i<size; i++ ) {
173     if (procs[i]) {
174       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);
175     }
176   }
177   PetscFree(starts); PetscFree(nprocs);
178 
179   /* Free cache space */
180   PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n);
181   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
182 
183   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
184   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
185   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
186   mdn->rmax       = nmax;
187 
188   return 0;
189 }
190 extern int MatSetUpMultiply_MPIDense(Mat);
191 
192 #undef __FUNC__
193 #define __FUNC__ "MatAssemblyEnd_MPIDense"
194 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
195 {
196   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
197   MPI_Status   *send_status,recv_status;
198   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
199   Scalar       *values,val;
200   InsertMode   addv = mat->insertmode;
201 
202   /*  wait on receives */
203   while (count) {
204     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
205     /* unpack receives into our local space */
206     values = mdn->rvalues + 3*imdex*mdn->rmax;
207     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
208     n = n/3;
209     for ( i=0; i<n; i++ ) {
210       row = (int) PetscReal(values[3*i]) - mdn->rstart;
211       col = (int) PetscReal(values[3*i+1]);
212       val = values[3*i+2];
213       if (col >= 0 && col < mdn->N) {
214         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
215       }
216       else {SETERRQ(1,0,"Invalid column");}
217     }
218     count--;
219   }
220   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
221 
222   /* wait on sends */
223   if (mdn->nsends) {
224     send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
225     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
226     PetscFree(send_status);
227   }
228   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
229 
230   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
235   }
236   return 0;
237 }
238 
239 #undef __FUNC__
240 #define __FUNC__ "MatZeroEntries_MPIDense"
241 static int MatZeroEntries_MPIDense(Mat A)
242 {
243   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
244   return MatZeroEntries(l->A);
245 }
246 
247 #undef __FUNC__
248 #define __FUNC__ "MatGetBlockSize_MPIDense"
249 static int MatGetBlockSize_MPIDense(Mat A,int *bs)
250 {
251   *bs = 1;
252   return 0;
253 }
254 
255 /* the code does not do the diagonal entries correctly unless the
256    matrix is square and the column and row owerships are identical.
257    This is a BUG. The only way to fix it seems to be to access
258    mdn->A and mdn->B directly and not through the MatZeroRows()
259    routine.
260 */
261 #undef __FUNC__
262 #define __FUNC__ "MatZeroRows_MPIDense"
263 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
264 {
265   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
266   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
267   int            *procs,*nprocs,j,found,idx,nsends,*work;
268   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
269   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
270   int            *lens,imdex,*lrows,*values;
271   MPI_Comm       comm = A->comm;
272   MPI_Request    *send_waits,*recv_waits;
273   MPI_Status     recv_status,*send_status;
274   IS             istmp;
275 
276   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
277   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
278 
279   /*  first count number of contributors to each processor */
280   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
281   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
282   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
283   for ( i=0; i<N; i++ ) {
284     idx = rows[i];
285     found = 0;
286     for ( j=0; j<size; j++ ) {
287       if (idx >= owners[j] && idx < owners[j+1]) {
288         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
289       }
290     }
291     if (!found) SETERRQ(1,0,"Index out of range");
292   }
293   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
294 
295   /* inform other processors of number of messages and max length*/
296   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
297   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
298   nrecvs = work[rank];
299   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
300   nmax = work[rank];
301   PetscFree(work);
302 
303   /* post receives:   */
304   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
305   CHKPTRQ(rvalues);
306   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
307   CHKPTRQ(recv_waits);
308   for ( i=0; i<nrecvs; i++ ) {
309     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
310   }
311 
312   /* do sends:
313       1) starts[i] gives the starting index in svalues for stuff going to
314          the ith processor
315   */
316   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
317   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
318   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
319   starts[0]  = 0;
320   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
321   for ( i=0; i<N; i++ ) {
322     svalues[starts[owner[i]]++] = rows[i];
323   }
324   ISRestoreIndices(is,&rows);
325 
326   starts[0] = 0;
327   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
328   count = 0;
329   for ( i=0; i<size; i++ ) {
330     if (procs[i]) {
331       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
332     }
333   }
334   PetscFree(starts);
335 
336   base = owners[rank];
337 
338   /*  wait on receives */
339   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
340   source = lens + nrecvs;
341   count  = nrecvs; slen = 0;
342   while (count) {
343     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
344     /* unpack receives into our local space */
345     MPI_Get_count(&recv_status,MPI_INT,&n);
346     source[imdex]  = recv_status.MPI_SOURCE;
347     lens[imdex]  = n;
348     slen += n;
349     count--;
350   }
351   PetscFree(recv_waits);
352 
353   /* move the data into the send scatter */
354   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
355   count = 0;
356   for ( i=0; i<nrecvs; i++ ) {
357     values = rvalues + i*nmax;
358     for ( j=0; j<lens[i]; j++ ) {
359       lrows[count++] = values[j] - base;
360     }
361   }
362   PetscFree(rvalues); PetscFree(lens);
363   PetscFree(owner); PetscFree(nprocs);
364 
365   /* actually zap the local rows */
366   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
367   PLogObjectParent(A,istmp);
368   PetscFree(lrows);
369   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
370   ierr = ISDestroy(istmp); CHKERRQ(ierr);
371 
372   /* wait on sends */
373   if (nsends) {
374     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
375     MPI_Waitall(nsends,send_waits,send_status);
376     PetscFree(send_status);
377   }
378   PetscFree(send_waits); PetscFree(svalues);
379 
380   return 0;
381 }
382 
383 #undef __FUNC__
384 #define __FUNC__ "MatMult_MPIDense"
385 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
386 {
387   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
388   int          ierr;
389 
390   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
391   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
392   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
393   return 0;
394 }
395 
396 #undef __FUNC__
397 #define __FUNC__ "MatMultAdd_MPIDense"
398 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
399 {
400   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
401   int          ierr;
402 
403   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
404   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
405   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
406   return 0;
407 }
408 
409 #undef __FUNC__
410 #define __FUNC__ "MatMultTrans_MPIDense"
411 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
412 {
413   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
414   int          ierr;
415   Scalar       zero = 0.0;
416 
417   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
418   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
419   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
420   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
421   return 0;
422 }
423 
424 #undef __FUNC__
425 #define __FUNC__ "MatMultTransAdd_MPIDense"
426 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
427 {
428   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
429   int          ierr;
430 
431   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
432   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
433   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
434   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
435   return 0;
436 }
437 
438 #undef __FUNC__
439 #define __FUNC__ "MatGetDiagonal_MPIDense"
440 static int MatGetDiagonal_MPIDense(Mat A,Vec v)
441 {
442   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
443   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
444   int          ierr, len, i, n, m = a->m, radd;
445   Scalar       *x, zero = 0.0;
446 
447   VecSet(&zero,v);
448   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
449   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
450   if (n != a->M) SETERRQ(1,0,"Nonconforming mat and vec");
451   len = PetscMin(aloc->m,aloc->n);
452   radd = a->rstart*m;
453   for ( i=0; i<len; i++ ) {
454     x[i] = aloc->v[radd + i*m + i];
455   }
456   return 0;
457 }
458 
459 #undef __FUNC__
460 #define __FUNC__ "MatDestroy_MPIDense"
461 static int MatDestroy_MPIDense(PetscObject obj)
462 {
463   Mat          mat = (Mat) obj;
464   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
465   int          ierr;
466 
467 #if defined(PETSC_LOG)
468   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
469 #endif
470   PetscFree(mdn->rowners);
471   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
472   if (mdn->lvec)   VecDestroy(mdn->lvec);
473   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
474   if (mdn->factor) {
475     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
476     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
477     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
478     PetscFree(mdn->factor);
479   }
480   PetscFree(mdn);
481   if (mat->mapping) {
482     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
483   }
484   PLogObjectDestroy(mat);
485   PetscHeaderDestroy(mat);
486   return 0;
487 }
488 
489 #include "pinclude/pviewer.h"
490 
491 #undef __FUNC__
492 #define __FUNC__ "MatView_MPIDense_Binary"
493 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
494 {
495   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
496   int          ierr;
497 
498   if (mdn->size == 1) {
499     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
500   }
501   else SETERRQ(1,0,"Only uniprocessor output supported");
502   return 0;
503 }
504 
505 #undef __FUNC__
506 #define __FUNC__ "MatView_MPIDense_ASCII"
507 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
508 {
509   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
510   int          ierr, format;
511   FILE         *fd;
512   ViewerType   vtype;
513 
514   ViewerGetType(viewer,&vtype);
515   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
516   ierr = ViewerGetFormat(viewer,&format);
517   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
518     int rank;
519     MatInfo info;
520     MPI_Comm_rank(mat->comm,&rank);
521     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
522     PetscSequentialPhaseBegin(mat->comm,1);
523       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
524          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
525       fflush(fd);
526     PetscSequentialPhaseEnd(mat->comm,1);
527     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
528     return 0;
529   }
530   else if (format == VIEWER_FORMAT_ASCII_INFO) {
531     return 0;
532   }
533   if (vtype == ASCII_FILE_VIEWER) {
534     PetscSequentialPhaseBegin(mat->comm,1);
535     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
536              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
537     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
538     fflush(fd);
539     PetscSequentialPhaseEnd(mat->comm,1);
540   }
541   else {
542     int size = mdn->size, rank = mdn->rank;
543     if (size == 1) {
544       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
545     }
546     else {
547       /* assemble the entire matrix onto first processor. */
548       Mat          A;
549       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
550       Scalar       *vals;
551       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
552 
553       if (!rank) {
554         ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
555       }
556       else {
557         ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
558       }
559       PLogObjectParent(mat,A);
560 
561       /* Copy the matrix ... This isn't the most efficient means,
562          but it's quick for now */
563       row = mdn->rstart; m = Amdn->m;
564       for ( i=0; i<m; i++ ) {
565         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
566         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
567         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
568         row++;
569       }
570 
571       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
572       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
573       if (!rank) {
574         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
575       }
576       ierr = MatDestroy(A); CHKERRQ(ierr);
577     }
578   }
579   return 0;
580 }
581 
582 #undef __FUNC__
583 #define __FUNC__ "MatView_MPIDense"
584 static int MatView_MPIDense(PetscObject obj,Viewer viewer)
585 {
586   Mat          mat = (Mat) obj;
587   int          ierr;
588   ViewerType   vtype;
589 
590   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
591   if (vtype == ASCII_FILE_VIEWER) {
592     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
593   }
594   else if (vtype == ASCII_FILES_VIEWER) {
595     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
596   }
597   else if (vtype == BINARY_FILE_VIEWER) {
598     return MatView_MPIDense_Binary(mat,viewer);
599   }
600   return 0;
601 }
602 
603 #undef __FUNC__
604 #define __FUNC__ "MatGetInfo_MPIDense"
605 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
606 {
607   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
608   Mat          mdn = mat->A;
609   int          ierr;
610   double       isend[5], irecv[5];
611 
612   info->rows_global    = (double)mat->M;
613   info->columns_global = (double)mat->N;
614   info->rows_local     = (double)mat->m;
615   info->columns_local  = (double)mat->N;
616   info->block_size     = 1.0;
617   ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr);
618   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
619   isend[3] = info->memory;  isend[4] = info->mallocs;
620   if (flag == MAT_LOCAL) {
621     info->nz_used      = isend[0];
622     info->nz_allocated = isend[1];
623     info->nz_unneeded  = isend[2];
624     info->memory       = isend[3];
625     info->mallocs      = isend[4];
626   } else if (flag == MAT_GLOBAL_MAX) {
627     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,A->comm);
628     info->nz_used      = irecv[0];
629     info->nz_allocated = irecv[1];
630     info->nz_unneeded  = irecv[2];
631     info->memory       = irecv[3];
632     info->mallocs      = irecv[4];
633   } else if (flag == MAT_GLOBAL_SUM) {
634     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,A->comm);
635     info->nz_used      = irecv[0];
636     info->nz_allocated = irecv[1];
637     info->nz_unneeded  = irecv[2];
638     info->memory       = irecv[3];
639     info->mallocs      = irecv[4];
640   }
641   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
642   info->fill_ratio_needed = 0;
643   info->factor_mallocs    = 0;
644   return 0;
645 }
646 
647 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
648    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
649    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
650    extern int MatSolve_MPIDense(Mat,Vec,Vec);
651    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
652    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
653    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
654 
655 #undef __FUNC__
656 #define __FUNC__ "MatSetOption_MPIDense"
657 static int MatSetOption_MPIDense(Mat A,MatOption op)
658 {
659   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
660 
661   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
662       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
663       op == MAT_COLUMNS_SORTED ||
664       op == MAT_COLUMNS_UNSORTED) {
665         MatSetOption(a->A,op);
666   } else if (op == MAT_ROW_ORIENTED) {
667         a->roworiented = 1;
668         MatSetOption(a->A,op);
669   } else if (op == MAT_ROWS_SORTED ||
670              op == MAT_ROWS_UNSORTED ||
671              op == MAT_SYMMETRIC ||
672              op == MAT_STRUCTURALLY_SYMMETRIC ||
673              op == MAT_YES_NEW_DIAGONALS)
674     PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n");
675   else if (op == MAT_COLUMN_ORIENTED)
676     {a->roworiented = 0; MatSetOption(a->A,op);}
677   else if (op == MAT_NO_NEW_DIAGONALS)
678     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
679   else
680     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
681   return 0;
682 }
683 
684 #undef __FUNC__
685 #define __FUNC__ "MatGetSize_MPIDense"
686 static int MatGetSize_MPIDense(Mat A,int *m,int *n)
687 {
688   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
689   *m = mat->M; *n = mat->N;
690   return 0;
691 }
692 
693 #undef __FUNC__
694 #define __FUNC__ "MatGetLocalSize_MPIDense"
695 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
696 {
697   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
698   *m = mat->m; *n = mat->N;
699   return 0;
700 }
701 
702 #undef __FUNC__
703 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
704 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
705 {
706   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
707   *m = mat->rstart; *n = mat->rend;
708   return 0;
709 }
710 
711 #undef __FUNC__
712 #define __FUNC__ "MatGetRow_MPIDense"
713 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
714 {
715   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
716   int          lrow, rstart = mat->rstart, rend = mat->rend;
717 
718   if (row < rstart || row >= rend) SETERRQ(1,0,"only local rows")
719   lrow = row - rstart;
720   return MatGetRow(mat->A,lrow,nz,idx,v);
721 }
722 
723 #undef __FUNC__
724 #define __FUNC__ "MatRestoreRow_MPIDense"
725 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
726 {
727   if (idx) PetscFree(*idx);
728   if (v) PetscFree(*v);
729   return 0;
730 }
731 
732 #undef __FUNC__
733 #define __FUNC__ "MatNorm_MPIDense"
734 static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
735 {
736   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
737   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
738   int          ierr, i, j;
739   double       sum = 0.0;
740   Scalar       *v = mat->v;
741 
742   if (mdn->size == 1) {
743     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
744   } else {
745     if (type == NORM_FROBENIUS) {
746       for (i=0; i<mat->n*mat->m; i++ ) {
747 #if defined(PETSC_COMPLEX)
748         sum += real(conj(*v)*(*v)); v++;
749 #else
750         sum += (*v)*(*v); v++;
751 #endif
752       }
753       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
754       *norm = sqrt(*norm);
755       PLogFlops(2*mat->n*mat->m);
756     }
757     else if (type == NORM_1) {
758       double *tmp, *tmp2;
759       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
760       tmp2 = tmp + mdn->N;
761       PetscMemzero(tmp,2*mdn->N*sizeof(double));
762       *norm = 0.0;
763       v = mat->v;
764       for ( j=0; j<mat->n; j++ ) {
765         for ( i=0; i<mat->m; i++ ) {
766           tmp[j] += PetscAbsScalar(*v);  v++;
767         }
768       }
769       MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
770       for ( j=0; j<mdn->N; j++ ) {
771         if (tmp2[j] > *norm) *norm = tmp2[j];
772       }
773       PetscFree(tmp);
774       PLogFlops(mat->n*mat->m);
775     }
776     else if (type == NORM_INFINITY) { /* max row norm */
777       double ntemp;
778       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
779       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
780     }
781     else {
782       SETERRQ(1,0,"No support for two norm");
783     }
784   }
785   return 0;
786 }
787 
788 #undef __FUNC__
789 #define __FUNC__ "MatTranspose_MPIDense"
790 static int MatTranspose_MPIDense(Mat A,Mat *matout)
791 {
792   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
793   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
794   Mat          B;
795   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
796   int          j, i, ierr;
797   Scalar       *v;
798 
799   if (matout == PETSC_NULL && M != N) {
800     SETERRQ(1,0,"Supports square matrix only in-place");
801   }
802   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
803 
804   m = Aloc->m; n = Aloc->n; v = Aloc->v;
805   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
806   for ( j=0; j<n; j++ ) {
807     for (i=0; i<m; i++) rwork[i] = rstart + i;
808     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
809     v   += m;
810   }
811   PetscFree(rwork);
812   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
813   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
814   if (matout != PETSC_NULL) {
815     *matout = B;
816   } else {
817     /* This isn't really an in-place transpose, but free data struct from a */
818     PetscFree(a->rowners);
819     ierr = MatDestroy(a->A); CHKERRQ(ierr);
820     if (a->lvec) VecDestroy(a->lvec);
821     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
822     PetscFree(a);
823     PetscMemcpy(A,B,sizeof(struct _Mat));
824     PetscHeaderDestroy(B);
825   }
826   return 0;
827 }
828 
829 #include "pinclude/plapack.h"
830 #undef __FUNC__
831 #define __FUNC__ "MatScale_MPIDense"
832 static int MatScale_MPIDense(Scalar *alpha,Mat inA)
833 {
834   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
835   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
836   int          one = 1, nz;
837 
838   nz = a->m*a->n;
839   BLscal_( &nz, alpha, a->v, &one );
840   PLogFlops(nz);
841   return 0;
842 }
843 
844 static int MatConvertSameType_MPIDense(Mat,Mat *,int);
845 
846 /* -------------------------------------------------------------------*/
847 static struct _MatOps MatOps = {MatSetValues_MPIDense,
848        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
849        MatMult_MPIDense,MatMultAdd_MPIDense,
850        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
851 /*       MatSolve_MPIDense,0, */
852        0,0,
853        0,0,
854        0,0,
855 /*       MatLUFactor_MPIDense,0, */
856        0,MatTranspose_MPIDense,
857        MatGetInfo_MPIDense,0,
858        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
859        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
860        0,
861        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
862        0,0,
863 /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
864        0,0,
865        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
866        MatGetOwnershipRange_MPIDense,
867        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
868        MatConvertSameType_MPIDense,
869        0,0,0,0,0,
870        0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense,
871        0,0,0,MatGetBlockSize_MPIDense};
872 
873 #undef __FUNC__
874 #define __FUNC__ "MatCreateMPIDense"
875 /*@C
876    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
877 
878    Input Parameters:
879 .  comm - MPI communicator
880 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
881 .  n - number of local columns (or PETSC_DECIDE to have calculated
882            if N is given)
883 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
884 .  N - number of global columns (or PETSC_DECIDE to have calculated
885            if n is given)
886 .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
887    to control all matrix memory allocation.
888 
889    Output Parameter:
890 .  A - the matrix
891 
892    Notes:
893    The dense format is fully compatible with standard Fortran 77
894    storage by columns.
895 
896    The data input variable is intended primarily for Fortran programmers
897    who wish to allocate their own matrix memory space.  Most users should
898    set data=PETSC_NULL.
899 
900    The user MUST specify either the local or global matrix dimensions
901    (possibly both).
902 
903    Currently, the only parallel dense matrix decomposition is by rows,
904    so that n=N and each submatrix owns all of the global columns.
905 
906 .keywords: matrix, dense, parallel
907 
908 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
909 @*/
910 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
911 {
912   Mat          mat;
913   Mat_MPIDense *a;
914   int          ierr, i,flg;
915 
916   /* Note:  For now, when data is specified above, this assumes the user correctly
917    allocates the local dense storage space.  We should add error checking. */
918 
919   *A = 0;
920   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
921   PLogObjectCreate(mat);
922   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
923   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
924   mat->destroy    = MatDestroy_MPIDense;
925   mat->view       = MatView_MPIDense;
926   mat->factor     = 0;
927   mat->mapping    = 0;
928 
929   a->factor       = 0;
930   mat->insertmode = NOT_SET_VALUES;
931   MPI_Comm_rank(comm,&a->rank);
932   MPI_Comm_size(comm,&a->size);
933 
934   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
935   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
936 
937   /* each row stores all columns */
938   if (N == PETSC_DECIDE) N = n;
939   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
940   /*  if (n != N) SETERRQ(1,0,"For now, only n=N is supported"); */
941   a->N = mat->N = N;
942   a->M = mat->M = M;
943   a->m = mat->m = m;
944   a->n = mat->n = n;
945 
946   /* build local table of row and column ownerships */
947   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
948   a->cowners = a->rowners + a->size + 1;
949   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
950   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
951   a->rowners[0] = 0;
952   for ( i=2; i<=a->size; i++ ) {
953     a->rowners[i] += a->rowners[i-1];
954   }
955   a->rstart = a->rowners[a->rank];
956   a->rend   = a->rowners[a->rank+1];
957   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
958   a->cowners[0] = 0;
959   for ( i=2; i<=a->size; i++ ) {
960     a->cowners[i] += a->cowners[i-1];
961   }
962 
963   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
964   PLogObjectParent(mat,a->A);
965 
966   /* build cache for off array entries formed */
967   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
968 
969   /* stuff used for matrix vector multiply */
970   a->lvec        = 0;
971   a->Mvctx       = 0;
972   a->roworiented = 1;
973 
974   *A = mat;
975   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
976   if (flg) {
977     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
978   }
979   return 0;
980 }
981 
982 #undef __FUNC__
983 #define __FUNC__ "MatConvertSameType_MPIDense"
984 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
985 {
986   Mat          mat;
987   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
988   int          ierr;
989   FactorCtx    *factor;
990 
991   *newmat       = 0;
992   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
993   PLogObjectCreate(mat);
994   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
995   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
996   mat->destroy   = MatDestroy_MPIDense;
997   mat->view      = MatView_MPIDense;
998   mat->factor    = A->factor;
999   mat->assembled = PETSC_TRUE;
1000 
1001   a->m = mat->m = oldmat->m;
1002   a->n = mat->n = oldmat->n;
1003   a->M = mat->M = oldmat->M;
1004   a->N = mat->N = oldmat->N;
1005   if (oldmat->factor) {
1006     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
1007     /* copy factor contents ... add this code! */
1008   } else a->factor = 0;
1009 
1010   a->rstart       = oldmat->rstart;
1011   a->rend         = oldmat->rend;
1012   a->size         = oldmat->size;
1013   a->rank         = oldmat->rank;
1014   mat->insertmode = NOT_SET_VALUES;
1015 
1016   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1017   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
1018   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1019   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1020 
1021   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1022   PLogObjectParent(mat,a->lvec);
1023   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1024   PLogObjectParent(mat,a->Mvctx);
1025   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1026   PLogObjectParent(mat,a->A);
1027   *newmat = mat;
1028   return 0;
1029 }
1030 
1031 #include "sys.h"
1032 
1033 #undef __FUNC__
1034 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1035 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1036 {
1037   int        *rowners, i,size,rank,m,rstart,rend,ierr,nz,j;
1038   Scalar     *array,*vals,*vals_ptr;
1039   MPI_Status status;
1040 
1041   MPI_Comm_rank(comm,&rank);
1042   MPI_Comm_size(comm,&size);
1043 
1044   /* determine ownership of all rows */
1045   m = M/size + ((M % size) > rank);
1046   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1047   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1048   rowners[0] = 0;
1049   for ( i=2; i<=size; i++ ) {
1050     rowners[i] += rowners[i-1];
1051   }
1052   rstart = rowners[rank];
1053   rend   = rowners[rank+1];
1054 
1055   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1056   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
1057 
1058   if (!rank) {
1059     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1060 
1061     /* read in my part of the matrix numerical values  */
1062     ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr);
1063 
1064     /* insert into matrix-by row (this is why cannot directly read into array */
1065     vals_ptr = vals;
1066     for ( i=0; i<m; i++ ) {
1067       for ( j=0; j<N; j++ ) {
1068         array[i + j*m] = *vals_ptr++;
1069       }
1070     }
1071 
1072     /* read in other processors and ship out */
1073     for ( i=1; i<size; i++ ) {
1074       nz   = (rowners[i+1] - rowners[i])*N;
1075       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1076       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1077     }
1078   }
1079   else {
1080     /* receive numeric values */
1081     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1082 
1083     /* receive message of values*/
1084     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1085 
1086     /* insert into matrix-by row (this is why cannot directly read into array */
1087     vals_ptr = vals;
1088     for ( i=0; i<m; i++ ) {
1089       for ( j=0; j<N; j++ ) {
1090         array[i + j*m] = *vals_ptr++;
1091       }
1092     }
1093   }
1094   PetscFree(rowners);
1095   PetscFree(vals);
1096   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1097   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1098   return 0;
1099 }
1100 
1101 
1102 #undef __FUNC__
1103 #define __FUNC__ "MatLoad_MPIDense"
1104 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1105 {
1106   Mat          A;
1107   int          i, nz, ierr, j,rstart, rend, fd;
1108   Scalar       *vals,*svals;
1109   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1110   MPI_Status   status;
1111   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1112   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1113   int          tag = ((PetscObject)viewer)->tag;
1114 
1115   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1116   if (!rank) {
1117     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1118     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1119     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1120   }
1121 
1122   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1123   M = header[1]; N = header[2]; nz = header[3];
1124 
1125   /*
1126        Handle case where matrix is stored on disk as a dense matrix
1127   */
1128   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1129     return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1130   }
1131 
1132   /* determine ownership of all rows */
1133   m = M/size + ((M % size) > rank);
1134   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1135   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1136   rowners[0] = 0;
1137   for ( i=2; i<=size; i++ ) {
1138     rowners[i] += rowners[i-1];
1139   }
1140   rstart = rowners[rank];
1141   rend   = rowners[rank+1];
1142 
1143   /* distribute row lengths to all processors */
1144   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1145   offlens = ourlens + (rend-rstart);
1146   if (!rank) {
1147     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1148     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1149     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1150     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1151     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1152     PetscFree(sndcounts);
1153   }
1154   else {
1155     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1156   }
1157 
1158   if (!rank) {
1159     /* calculate the number of nonzeros on each processor */
1160     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1161     PetscMemzero(procsnz,size*sizeof(int));
1162     for ( i=0; i<size; i++ ) {
1163       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1164         procsnz[i] += rowlengths[j];
1165       }
1166     }
1167     PetscFree(rowlengths);
1168 
1169     /* determine max buffer needed and allocate it */
1170     maxnz = 0;
1171     for ( i=0; i<size; i++ ) {
1172       maxnz = PetscMax(maxnz,procsnz[i]);
1173     }
1174     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1175 
1176     /* read in my part of the matrix column indices  */
1177     nz = procsnz[0];
1178     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1179     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1180 
1181     /* read in every one elses and ship off */
1182     for ( i=1; i<size; i++ ) {
1183       nz = procsnz[i];
1184       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1185       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1186     }
1187     PetscFree(cols);
1188   }
1189   else {
1190     /* determine buffer space needed for message */
1191     nz = 0;
1192     for ( i=0; i<m; i++ ) {
1193       nz += ourlens[i];
1194     }
1195     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1196 
1197     /* receive message of column indices*/
1198     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1199     MPI_Get_count(&status,MPI_INT,&maxnz);
1200     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1201   }
1202 
1203   /* loop over local rows, determining number of off diagonal entries */
1204   PetscMemzero(offlens,m*sizeof(int));
1205   jj = 0;
1206   for ( i=0; i<m; i++ ) {
1207     for ( j=0; j<ourlens[i]; j++ ) {
1208       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1209       jj++;
1210     }
1211   }
1212 
1213   /* create our matrix */
1214   for ( i=0; i<m; i++ ) {
1215     ourlens[i] -= offlens[i];
1216   }
1217   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1218   A = *newmat;
1219   for ( i=0; i<m; i++ ) {
1220     ourlens[i] += offlens[i];
1221   }
1222 
1223   if (!rank) {
1224     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1225 
1226     /* read in my part of the matrix numerical values  */
1227     nz = procsnz[0];
1228     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1229 
1230     /* insert into matrix */
1231     jj      = rstart;
1232     smycols = mycols;
1233     svals   = vals;
1234     for ( i=0; i<m; i++ ) {
1235       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1236       smycols += ourlens[i];
1237       svals   += ourlens[i];
1238       jj++;
1239     }
1240 
1241     /* read in other processors and ship out */
1242     for ( i=1; i<size; i++ ) {
1243       nz = procsnz[i];
1244       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1245       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1246     }
1247     PetscFree(procsnz);
1248   }
1249   else {
1250     /* receive numeric values */
1251     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1252 
1253     /* receive message of values*/
1254     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1255     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1256     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1257 
1258     /* insert into matrix */
1259     jj      = rstart;
1260     smycols = mycols;
1261     svals   = vals;
1262     for ( i=0; i<m; i++ ) {
1263       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1264       smycols += ourlens[i];
1265       svals   += ourlens[i];
1266       jj++;
1267     }
1268   }
1269   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1270 
1271   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1272   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1273   return 0;
1274 }
1275 
1276 
1277 
1278 
1279 
1280