xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 63c41f6a1560bbb6cf7ee09697a660f5641fb9ab)
1 #ifndef lint
2 static char vcid[] = "$Id: mpidense.c,v 1.58 1996/12/01 17:55:13 curfman Exp balay $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 #undef __FUNCTION__
13 #define __FUNCTION__ "MatSetValues_MPIDense"
14 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
15 {
16   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
17   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
18   int          roworiented = A->roworiented;
19 
20   if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) {
21     SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds");
22   }
23   A->insertmode = addv;
24   for ( i=0; i<m; i++ ) {
25     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row");
26     if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large");
27     if (idxm[i] >= rstart && idxm[i] < rend) {
28       row = idxm[i] - rstart;
29       if (roworiented) {
30         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
31       }
32       else {
33         for ( j=0; j<n; j++ ) {
34           if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column");
35           if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large");
36           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
37         }
38       }
39     }
40     else {
41       if (roworiented) {
42         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
43       }
44       else { /* must stash each seperately */
45         row = idxm[i];
46         for ( j=0; j<n; j++ ) {
47           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
48         }
49       }
50     }
51   }
52   return 0;
53 }
54 
55 #undef __FUNCTION__
56 #define __FUNCTION__ "MatGetValues_MPIDense"
57 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
58 {
59   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
60   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
61 
62   for ( i=0; i<m; i++ ) {
63     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row");
64     if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large");
65     if (idxm[i] >= rstart && idxm[i] < rend) {
66       row = idxm[i] - rstart;
67       for ( j=0; j<n; j++ ) {
68         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column");
69         if (idxn[j] >= mdn->N)
70           SETERRQ(1,"MatGetValues_MPIDense:Column too large");
71         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
72       }
73     }
74     else {
75       SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported");
76     }
77   }
78   return 0;
79 }
80 
81 #undef __FUNCTION__
82 #define __FUNCTION__ "MatGetArray_MPIDense"
83 static int MatGetArray_MPIDense(Mat A,Scalar **array)
84 {
85   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
86   int ierr;
87 
88   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
89   return 0;
90 }
91 
92 #undef __FUNCTION__
93 #define __FUNCTION__ "MatRestoreArray_MPIDense"
94 static int MatRestoreArray_MPIDense(Mat A,Scalar **array)
95 {
96   return 0;
97 }
98 
99 #undef __FUNCTION__
100 #define __FUNCTION__ "MatAssemblyBegin_MPIDense"
101 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
102 {
103   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
104   MPI_Comm     comm = mat->comm;
105   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
106   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
107   int          tag = mat->tag, *owner,*starts,count,ierr;
108   InsertMode   addv;
109   MPI_Request  *send_waits,*recv_waits;
110   Scalar       *rvalues,*svalues;
111 
112   /* make sure all processors are either in INSERTMODE or ADDMODE */
113   MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
114   if (addv == (ADD_VALUES|INSERT_VALUES)) {
115     SETERRQ(1,"MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs");
116   }
117   mdn->insertmode = addv; /* in case this processor had no cache */
118 
119   /*  first count number of contributors to each processor */
120   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
121   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
122   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
123   for ( i=0; i<mdn->stash.n; i++ ) {
124     idx = mdn->stash.idx[i];
125     for ( j=0; j<size; j++ ) {
126       if (idx >= owners[j] && idx < owners[j+1]) {
127         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
128       }
129     }
130   }
131   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
132 
133   /* inform other processors of number of messages and max length*/
134   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
135   MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);
136   nreceives = work[rank];
137   if (nreceives > size) SETERRQ(1,"MatAssemblyBegin_MPIDense:Internal PETSc error");
138   MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);
139   nmax = work[rank];
140   PetscFree(work);
141 
142   /* post receives:
143        1) each message will consist of ordered pairs
144      (global index,value) we store the global index as a double
145      to simplify the message passing.
146        2) since we don't know how long each individual message is we
147      allocate the largest needed buffer for each receive. Potentially
148      this is a lot of wasted space.
149 
150        This could be done better.
151   */
152   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
153   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
154   for ( i=0; i<nreceives; i++ ) {
155     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
156   }
157 
158   /* do sends:
159       1) starts[i] gives the starting index in svalues for stuff going to
160          the ith processor
161   */
162   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
163   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
164   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
165   starts[0] = 0;
166   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
167   for ( i=0; i<mdn->stash.n; i++ ) {
168     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
169     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
170     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
171   }
172   PetscFree(owner);
173   starts[0] = 0;
174   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
175   count = 0;
176   for ( i=0; i<size; i++ ) {
177     if (procs[i]) {
178       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);
179     }
180   }
181   PetscFree(starts); PetscFree(nprocs);
182 
183   /* Free cache space */
184   PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n);
185   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
186 
187   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
188   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
189   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
190   mdn->rmax       = nmax;
191 
192   return 0;
193 }
194 extern int MatSetUpMultiply_MPIDense(Mat);
195 
196 #undef __FUNCTION__
197 #define __FUNCTION__ "MatAssemblyEnd_MPIDense"
198 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
199 {
200   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
201   MPI_Status   *send_status,recv_status;
202   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
203   Scalar       *values,val;
204   InsertMode   addv = mdn->insertmode;
205 
206   /*  wait on receives */
207   while (count) {
208     MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);
209     /* unpack receives into our local space */
210     values = mdn->rvalues + 3*imdex*mdn->rmax;
211     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
212     n = n/3;
213     for ( i=0; i<n; i++ ) {
214       row = (int) PetscReal(values[3*i]) - mdn->rstart;
215       col = (int) PetscReal(values[3*i+1]);
216       val = values[3*i+2];
217       if (col >= 0 && col < mdn->N) {
218         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
219       }
220       else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");}
221     }
222     count--;
223   }
224   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
225 
226   /* wait on sends */
227   if (mdn->nsends) {
228     send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
229     MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);
230     PetscFree(send_status);
231   }
232   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
233 
234   mdn->insertmode = NOT_SET_VALUES;
235   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
236   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
237 
238   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
239     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
240   }
241   return 0;
242 }
243 
244 #undef __FUNCTION__
245 #define __FUNCTION__ "MatZeroEntries_MPIDense"
246 static int MatZeroEntries_MPIDense(Mat A)
247 {
248   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
249   return MatZeroEntries(l->A);
250 }
251 
252 #undef __FUNCTION__
253 #define __FUNCTION__ "MatGetBlockSize_MPIDense"
254 static int MatGetBlockSize_MPIDense(Mat A,int *bs)
255 {
256   *bs = 1;
257   return 0;
258 }
259 
260 /* the code does not do the diagonal entries correctly unless the
261    matrix is square and the column and row owerships are identical.
262    This is a BUG. The only way to fix it seems to be to access
263    mdn->A and mdn->B directly and not through the MatZeroRows()
264    routine.
265 */
266 #undef __FUNCTION__
267 #define __FUNCTION__ "MatZeroRows_MPIDense"
268 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
269 {
270   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
271   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
272   int            *procs,*nprocs,j,found,idx,nsends,*work;
273   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
274   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
275   int            *lens,imdex,*lrows,*values;
276   MPI_Comm       comm = A->comm;
277   MPI_Request    *send_waits,*recv_waits;
278   MPI_Status     recv_status,*send_status;
279   IS             istmp;
280 
281   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
282   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
283 
284   /*  first count number of contributors to each processor */
285   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
286   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
287   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
288   for ( i=0; i<N; i++ ) {
289     idx = rows[i];
290     found = 0;
291     for ( j=0; j<size; j++ ) {
292       if (idx >= owners[j] && idx < owners[j+1]) {
293         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
294       }
295     }
296     if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range");
297   }
298   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
299 
300   /* inform other processors of number of messages and max length*/
301   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
302   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
303   nrecvs = work[rank];
304   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
305   nmax = work[rank];
306   PetscFree(work);
307 
308   /* post receives:   */
309   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
310   CHKPTRQ(rvalues);
311   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
312   CHKPTRQ(recv_waits);
313   for ( i=0; i<nrecvs; i++ ) {
314     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
315   }
316 
317   /* do sends:
318       1) starts[i] gives the starting index in svalues for stuff going to
319          the ith processor
320   */
321   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
322   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
323   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
324   starts[0]  = 0;
325   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
326   for ( i=0; i<N; i++ ) {
327     svalues[starts[owner[i]]++] = rows[i];
328   }
329   ISRestoreIndices(is,&rows);
330 
331   starts[0] = 0;
332   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
333   count = 0;
334   for ( i=0; i<size; i++ ) {
335     if (procs[i]) {
336       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
337     }
338   }
339   PetscFree(starts);
340 
341   base = owners[rank];
342 
343   /*  wait on receives */
344   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
345   source = lens + nrecvs;
346   count  = nrecvs; slen = 0;
347   while (count) {
348     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
349     /* unpack receives into our local space */
350     MPI_Get_count(&recv_status,MPI_INT,&n);
351     source[imdex]  = recv_status.MPI_SOURCE;
352     lens[imdex]  = n;
353     slen += n;
354     count--;
355   }
356   PetscFree(recv_waits);
357 
358   /* move the data into the send scatter */
359   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
360   count = 0;
361   for ( i=0; i<nrecvs; i++ ) {
362     values = rvalues + i*nmax;
363     for ( j=0; j<lens[i]; j++ ) {
364       lrows[count++] = values[j] - base;
365     }
366   }
367   PetscFree(rvalues); PetscFree(lens);
368   PetscFree(owner); PetscFree(nprocs);
369 
370   /* actually zap the local rows */
371   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
372   PLogObjectParent(A,istmp);
373   PetscFree(lrows);
374   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
375   ierr = ISDestroy(istmp); CHKERRQ(ierr);
376 
377   /* wait on sends */
378   if (nsends) {
379     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
380     MPI_Waitall(nsends,send_waits,send_status);
381     PetscFree(send_status);
382   }
383   PetscFree(send_waits); PetscFree(svalues);
384 
385   return 0;
386 }
387 
388 #undef __FUNCTION__
389 #define __FUNCTION__ "MatMult_MPIDense"
390 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
391 {
392   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
393   int          ierr;
394 
395   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
396   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
397   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
398   return 0;
399 }
400 
401 #undef __FUNCTION__
402 #define __FUNCTION__ "MatMultAdd_MPIDense"
403 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
404 {
405   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
406   int          ierr;
407 
408   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
409   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
410   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
411   return 0;
412 }
413 
414 #undef __FUNCTION__
415 #define __FUNCTION__ "MatMultTrans_MPIDense"
416 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
417 {
418   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
419   int          ierr;
420   Scalar       zero = 0.0;
421 
422   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
423   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
424   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
425   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
426   return 0;
427 }
428 
429 #undef __FUNCTION__
430 #define __FUNCTION__ "MatMultTransAdd_MPIDense"
431 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
432 {
433   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
434   int          ierr;
435 
436   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
437   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
438   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
439   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
440   return 0;
441 }
442 
443 #undef __FUNCTION__
444 #define __FUNCTION__ "MatGetDiagonal_MPIDense"
445 static int MatGetDiagonal_MPIDense(Mat A,Vec v)
446 {
447   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
448   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
449   int          ierr, len, i, n, m = a->m, radd;
450   Scalar       *x, zero = 0.0;
451 
452   VecSet(&zero,v);
453   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
454   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
455   if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec");
456   len = PetscMin(aloc->m,aloc->n);
457   radd = a->rstart*m;
458   for ( i=0; i<len; i++ ) {
459     x[i] = aloc->v[radd + i*m + i];
460   }
461   return 0;
462 }
463 
464 #undef __FUNCTION__
465 #define __FUNCTION__ "MatDestroy_MPIDense"
466 static int MatDestroy_MPIDense(PetscObject obj)
467 {
468   Mat          mat = (Mat) obj;
469   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
470   int          ierr;
471 
472 #if defined(PETSC_LOG)
473   PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N);
474 #endif
475   PetscFree(mdn->rowners);
476   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
477   if (mdn->lvec)   VecDestroy(mdn->lvec);
478   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
479   if (mdn->factor) {
480     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
481     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
482     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
483     PetscFree(mdn->factor);
484   }
485   PetscFree(mdn);
486   if (mat->mapping) {
487     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
488   }
489   PLogObjectDestroy(mat);
490   PetscHeaderDestroy(mat);
491   return 0;
492 }
493 
494 #include "pinclude/pviewer.h"
495 
496 #undef __FUNCTION__
497 #define __FUNCTION__ "MatView_MPIDense_Binary"
498 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
499 {
500   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
501   int          ierr;
502 
503   if (mdn->size == 1) {
504     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
505   }
506   else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported");
507   return 0;
508 }
509 
510 #undef __FUNCTION__
511 #define __FUNCTION__ "MatView_MPIDense_ASCII"
512 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
513 {
514   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
515   int          ierr, format;
516   FILE         *fd;
517   ViewerType   vtype;
518 
519   ViewerGetType(viewer,&vtype);
520   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
521   ierr = ViewerGetFormat(viewer,&format);
522   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
523     int rank;
524     MatInfo info;
525     MPI_Comm_rank(mat->comm,&rank);
526     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
527     PetscSequentialPhaseBegin(mat->comm,1);
528       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
529          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
530       fflush(fd);
531     PetscSequentialPhaseEnd(mat->comm,1);
532     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
533     return 0;
534   }
535   else if (format == VIEWER_FORMAT_ASCII_INFO) {
536     return 0;
537   }
538   if (vtype == ASCII_FILE_VIEWER) {
539     PetscSequentialPhaseBegin(mat->comm,1);
540     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
541              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
542     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
543     fflush(fd);
544     PetscSequentialPhaseEnd(mat->comm,1);
545   }
546   else {
547     int size = mdn->size, rank = mdn->rank;
548     if (size == 1) {
549       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
550     }
551     else {
552       /* assemble the entire matrix onto first processor. */
553       Mat          A;
554       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
555       Scalar       *vals;
556       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
557 
558       if (!rank) {
559         ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
560       }
561       else {
562         ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr);
563       }
564       PLogObjectParent(mat,A);
565 
566       /* Copy the matrix ... This isn't the most efficient means,
567          but it's quick for now */
568       row = mdn->rstart; m = Amdn->m;
569       for ( i=0; i<m; i++ ) {
570         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
571         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
572         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
573         row++;
574       }
575 
576       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
577       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
578       if (!rank) {
579         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
580       }
581       ierr = MatDestroy(A); CHKERRQ(ierr);
582     }
583   }
584   return 0;
585 }
586 
587 #undef __FUNCTION__
588 #define __FUNCTION__ "MatView_MPIDense"
589 static int MatView_MPIDense(PetscObject obj,Viewer viewer)
590 {
591   Mat          mat = (Mat) obj;
592   int          ierr;
593   ViewerType   vtype;
594 
595   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
596   if (vtype == ASCII_FILE_VIEWER) {
597     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
598   }
599   else if (vtype == ASCII_FILES_VIEWER) {
600     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
601   }
602   else if (vtype == BINARY_FILE_VIEWER) {
603     return MatView_MPIDense_Binary(mat,viewer);
604   }
605   return 0;
606 }
607 
608 #undef __FUNCTION__
609 #define __FUNCTION__ "MatGetInfo_MPIDense"
610 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
611 {
612   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
613   Mat          mdn = mat->A;
614   int          ierr;
615   double       isend[5], irecv[5];
616 
617   info->rows_global    = (double)mat->M;
618   info->columns_global = (double)mat->N;
619   info->rows_local     = (double)mat->m;
620   info->columns_local  = (double)mat->N;
621   info->block_size     = 1.0;
622   ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr);
623   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
624   isend[3] = info->memory;  isend[4] = info->mallocs;
625   if (flag == MAT_LOCAL) {
626     info->nz_used      = isend[0];
627     info->nz_allocated = isend[1];
628     info->nz_unneeded  = isend[2];
629     info->memory       = isend[3];
630     info->mallocs      = isend[4];
631   } else if (flag == MAT_GLOBAL_MAX) {
632     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm);
633     info->nz_used      = irecv[0];
634     info->nz_allocated = irecv[1];
635     info->nz_unneeded  = irecv[2];
636     info->memory       = irecv[3];
637     info->mallocs      = irecv[4];
638   } else if (flag == MAT_GLOBAL_SUM) {
639     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm);
640     info->nz_used      = irecv[0];
641     info->nz_allocated = irecv[1];
642     info->nz_unneeded  = irecv[2];
643     info->memory       = irecv[3];
644     info->mallocs      = irecv[4];
645   }
646   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
647   info->fill_ratio_needed = 0;
648   info->factor_mallocs    = 0;
649   return 0;
650 }
651 
652 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
653    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
654    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
655    extern int MatSolve_MPIDense(Mat,Vec,Vec);
656    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
657    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
658    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
659 
660 #undef __FUNCTION__
661 #define __FUNCTION__ "MatSetOption_MPIDense"
662 static int MatSetOption_MPIDense(Mat A,MatOption op)
663 {
664   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
665 
666   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
667       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
668       op == MAT_COLUMNS_SORTED ||
669       op == MAT_COLUMNS_UNSORTED) {
670         MatSetOption(a->A,op);
671   } else if (op == MAT_ROW_ORIENTED) {
672         a->roworiented = 1;
673         MatSetOption(a->A,op);
674   } else if (op == MAT_ROWS_SORTED ||
675              op == MAT_ROWS_UNSORTED ||
676              op == MAT_SYMMETRIC ||
677              op == MAT_STRUCTURALLY_SYMMETRIC ||
678              op == MAT_YES_NEW_DIAGONALS)
679     PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n");
680   else if (op == MAT_COLUMN_ORIENTED)
681     {a->roworiented = 0; MatSetOption(a->A,op);}
682   else if (op == MAT_NO_NEW_DIAGONALS)
683     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:MAT_NO_NEW_DIAGONALS");}
684   else
685     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");}
686   return 0;
687 }
688 
689 #undef __FUNCTION__
690 #define __FUNCTION__ "MatGetSize_MPIDense"
691 static int MatGetSize_MPIDense(Mat A,int *m,int *n)
692 {
693   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
694   *m = mat->M; *n = mat->N;
695   return 0;
696 }
697 
698 #undef __FUNCTION__
699 #define __FUNCTION__ "MatGetLocalSize_MPIDense"
700 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
701 {
702   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
703   *m = mat->m; *n = mat->N;
704   return 0;
705 }
706 
707 #undef __FUNCTION__
708 #define __FUNCTION__ "MatGetOwnershipRange_MPIDense"
709 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
710 {
711   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
712   *m = mat->rstart; *n = mat->rend;
713   return 0;
714 }
715 
716 #undef __FUNCTION__
717 #define __FUNCTION__ "MatGetRow_MPIDense"
718 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
719 {
720   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
721   int          lrow, rstart = mat->rstart, rend = mat->rend;
722 
723   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows")
724   lrow = row - rstart;
725   return MatGetRow(mat->A,lrow,nz,idx,v);
726 }
727 
728 #undef __FUNCTION__
729 #define __FUNCTION__ "MatRestoreRow_MPIDense"
730 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
731 {
732   if (idx) PetscFree(*idx);
733   if (v) PetscFree(*v);
734   return 0;
735 }
736 
737 #undef __FUNCTION__
738 #define __FUNCTION__ "MatNorm_MPIDense"
739 static int MatNorm_MPIDense(Mat A,NormType type,double *norm)
740 {
741   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
742   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
743   int          ierr, i, j;
744   double       sum = 0.0;
745   Scalar       *v = mat->v;
746 
747   if (mdn->size == 1) {
748     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
749   } else {
750     if (type == NORM_FROBENIUS) {
751       for (i=0; i<mat->n*mat->m; i++ ) {
752 #if defined(PETSC_COMPLEX)
753         sum += real(conj(*v)*(*v)); v++;
754 #else
755         sum += (*v)*(*v); v++;
756 #endif
757       }
758       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
759       *norm = sqrt(*norm);
760       PLogFlops(2*mat->n*mat->m);
761     }
762     else if (type == NORM_1) {
763       double *tmp, *tmp2;
764       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
765       tmp2 = tmp + mdn->N;
766       PetscMemzero(tmp,2*mdn->N*sizeof(double));
767       *norm = 0.0;
768       v = mat->v;
769       for ( j=0; j<mat->n; j++ ) {
770         for ( i=0; i<mat->m; i++ ) {
771           tmp[j] += PetscAbsScalar(*v);  v++;
772         }
773       }
774       MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);
775       for ( j=0; j<mdn->N; j++ ) {
776         if (tmp2[j] > *norm) *norm = tmp2[j];
777       }
778       PetscFree(tmp);
779       PLogFlops(mat->n*mat->m);
780     }
781     else if (type == NORM_INFINITY) { /* max row norm */
782       double ntemp;
783       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
784       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
785     }
786     else {
787       SETERRQ(1,"MatNorm_MPIDense:No support for two norm");
788     }
789   }
790   return 0;
791 }
792 
793 #undef __FUNCTION__
794 #define __FUNCTION__ "MatTranspose_MPIDense"
795 static int MatTranspose_MPIDense(Mat A,Mat *matout)
796 {
797   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
798   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
799   Mat          B;
800   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
801   int          j, i, ierr;
802   Scalar       *v;
803 
804   if (matout == PETSC_NULL && M != N) {
805     SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place");
806   }
807   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
808 
809   m = Aloc->m; n = Aloc->n; v = Aloc->v;
810   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
811   for ( j=0; j<n; j++ ) {
812     for (i=0; i<m; i++) rwork[i] = rstart + i;
813     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
814     v   += m;
815   }
816   PetscFree(rwork);
817   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
818   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
819   if (matout != PETSC_NULL) {
820     *matout = B;
821   } else {
822     /* This isn't really an in-place transpose, but free data struct from a */
823     PetscFree(a->rowners);
824     ierr = MatDestroy(a->A); CHKERRQ(ierr);
825     if (a->lvec) VecDestroy(a->lvec);
826     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
827     PetscFree(a);
828     PetscMemcpy(A,B,sizeof(struct _Mat));
829     PetscHeaderDestroy(B);
830   }
831   return 0;
832 }
833 
834 #include "pinclude/plapack.h"
835 #undef __FUNCTION__
836 #define __FUNCTION__ "MatScale_MPIDense"
837 static int MatScale_MPIDense(Scalar *alpha,Mat inA)
838 {
839   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
840   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
841   int          one = 1, nz;
842 
843   nz = a->m*a->n;
844   BLscal_( &nz, alpha, a->v, &one );
845   PLogFlops(nz);
846   return 0;
847 }
848 
849 static int MatConvertSameType_MPIDense(Mat,Mat *,int);
850 
851 /* -------------------------------------------------------------------*/
852 static struct _MatOps MatOps = {MatSetValues_MPIDense,
853        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
854        MatMult_MPIDense,MatMultAdd_MPIDense,
855        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
856 /*       MatSolve_MPIDense,0, */
857        0,0,
858        0,0,
859        0,0,
860 /*       MatLUFactor_MPIDense,0, */
861        0,MatTranspose_MPIDense,
862        MatGetInfo_MPIDense,0,
863        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
864        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
865        0,
866        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
867        0,0,
868 /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
869        0,0,
870        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
871        MatGetOwnershipRange_MPIDense,
872        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
873        0,MatConvertSameType_MPIDense,
874        0,0,0,0,0,
875        0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense,
876        0,0,0,MatGetBlockSize_MPIDense};
877 
878 #undef __FUNCTION__
879 #define __FUNCTION__ "MatCreateMPIDense"
880 /*@C
881    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
882 
883    Input Parameters:
884 .  comm - MPI communicator
885 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
886 .  n - number of local columns (or PETSC_DECIDE to have calculated
887            if N is given)
888 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
889 .  N - number of global columns (or PETSC_DECIDE to have calculated
890            if n is given)
891 .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
892    to control all matrix memory allocation.
893 
894    Output Parameter:
895 .  A - the matrix
896 
897    Notes:
898    The dense format is fully compatible with standard Fortran 77
899    storage by columns.
900 
901    The data input variable is intended primarily for Fortran programmers
902    who wish to allocate their own matrix memory space.  Most users should
903    set data=PETSC_NULL.
904 
905    The user MUST specify either the local or global matrix dimensions
906    (possibly both).
907 
908    Currently, the only parallel dense matrix decomposition is by rows,
909    so that n=N and each submatrix owns all of the global columns.
910 
911 .keywords: matrix, dense, parallel
912 
913 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
914 @*/
915 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
916 {
917   Mat          mat;
918   Mat_MPIDense *a;
919   int          ierr, i,flg;
920 
921   /* Note:  For now, when data is specified above, this assumes the user correctly
922    allocates the local dense storage space.  We should add error checking. */
923 
924   *A = 0;
925   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm);
926   PLogObjectCreate(mat);
927   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
928   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
929   mat->destroy    = MatDestroy_MPIDense;
930   mat->view       = MatView_MPIDense;
931   mat->factor     = 0;
932   mat->mapping    = 0;
933 
934   a->factor       = 0;
935   a->insertmode   = NOT_SET_VALUES;
936   MPI_Comm_rank(comm,&a->rank);
937   MPI_Comm_size(comm,&a->size);
938 
939   if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);
940   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
941 
942   /* each row stores all columns */
943   if (N == PETSC_DECIDE) N = n;
944   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
945   /*  if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */
946   a->N = mat->N = N;
947   a->M = mat->M = M;
948   a->m = mat->m = m;
949   a->n = mat->n = n;
950 
951   /* build local table of row and column ownerships */
952   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
953   a->cowners = a->rowners + a->size + 1;
954   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
955   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
956   a->rowners[0] = 0;
957   for ( i=2; i<=a->size; i++ ) {
958     a->rowners[i] += a->rowners[i-1];
959   }
960   a->rstart = a->rowners[a->rank];
961   a->rend   = a->rowners[a->rank+1];
962   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
963   a->cowners[0] = 0;
964   for ( i=2; i<=a->size; i++ ) {
965     a->cowners[i] += a->cowners[i-1];
966   }
967 
968   ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
969   PLogObjectParent(mat,a->A);
970 
971   /* build cache for off array entries formed */
972   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
973 
974   /* stuff used for matrix vector multiply */
975   a->lvec        = 0;
976   a->Mvctx       = 0;
977   a->roworiented = 1;
978 
979   *A = mat;
980   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
981   if (flg) {
982     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
983   }
984   return 0;
985 }
986 
987 #undef __FUNCTION__
988 #define __FUNCTION__ "MatConvertSameType_MPIDense"
989 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
990 {
991   Mat          mat;
992   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
993   int          ierr;
994   FactorCtx    *factor;
995 
996   *newmat       = 0;
997   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm);
998   PLogObjectCreate(mat);
999   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
1000   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1001   mat->destroy   = MatDestroy_MPIDense;
1002   mat->view      = MatView_MPIDense;
1003   mat->factor    = A->factor;
1004   mat->assembled = PETSC_TRUE;
1005 
1006   a->m = mat->m = oldmat->m;
1007   a->n = mat->n = oldmat->n;
1008   a->M = mat->M = oldmat->M;
1009   a->N = mat->N = oldmat->N;
1010   if (oldmat->factor) {
1011     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
1012     /* copy factor contents ... add this code! */
1013   } else a->factor = 0;
1014 
1015   a->rstart     = oldmat->rstart;
1016   a->rend       = oldmat->rend;
1017   a->size       = oldmat->size;
1018   a->rank       = oldmat->rank;
1019   a->insertmode = NOT_SET_VALUES;
1020 
1021   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1022   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense));
1023   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1024   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1025 
1026   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1027   PLogObjectParent(mat,a->lvec);
1028   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1029   PLogObjectParent(mat,a->Mvctx);
1030   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1031   PLogObjectParent(mat,a->A);
1032   *newmat = mat;
1033   return 0;
1034 }
1035 
1036 #include "sys.h"
1037 
1038 #undef __FUNCTION__
1039 #define __FUNCTION__ "MatLoad_MPIDense_DenseInFile"
1040 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1041 {
1042   int        *rowners, i,size,rank,m,rstart,rend,ierr,nz,j;
1043   Scalar     *array,*vals,*vals_ptr;
1044   MPI_Status status;
1045 
1046   MPI_Comm_rank(comm,&rank);
1047   MPI_Comm_size(comm,&size);
1048 
1049   /* determine ownership of all rows */
1050   m = M/size + ((M % size) > rank);
1051   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1052   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1053   rowners[0] = 0;
1054   for ( i=2; i<=size; i++ ) {
1055     rowners[i] += rowners[i-1];
1056   }
1057   rstart = rowners[rank];
1058   rend   = rowners[rank+1];
1059 
1060   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1061   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
1062 
1063   if (!rank) {
1064     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1065 
1066     /* read in my part of the matrix numerical values  */
1067     ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr);
1068 
1069     /* insert into matrix-by row (this is why cannot directly read into array */
1070     vals_ptr = vals;
1071     for ( i=0; i<m; i++ ) {
1072       for ( j=0; j<N; j++ ) {
1073         array[i + j*m] = *vals_ptr++;
1074       }
1075     }
1076 
1077     /* read in other processors and ship out */
1078     for ( i=1; i<size; i++ ) {
1079       nz   = (rowners[i+1] - rowners[i])*N;
1080       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1081       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1082     }
1083   }
1084   else {
1085     /* receive numeric values */
1086     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1087 
1088     /* receive message of values*/
1089     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1090 
1091     /* insert into matrix-by row (this is why cannot directly read into array */
1092     vals_ptr = vals;
1093     for ( i=0; i<m; i++ ) {
1094       for ( j=0; j<N; j++ ) {
1095         array[i + j*m] = *vals_ptr++;
1096       }
1097     }
1098   }
1099   PetscFree(rowners);
1100   PetscFree(vals);
1101   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1102   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1103   return 0;
1104 }
1105 
1106 
1107 #undef __FUNCTION__
1108 #define __FUNCTION__ "MatLoad_MPIDense"
1109 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1110 {
1111   Mat          A;
1112   int          i, nz, ierr, j,rstart, rend, fd;
1113   Scalar       *vals,*svals;
1114   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1115   MPI_Status   status;
1116   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1117   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1118   int          tag = ((PetscObject)viewer)->tag;
1119 
1120   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1121   if (!rank) {
1122     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1123     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1124     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object");
1125   }
1126 
1127   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1128   M = header[1]; N = header[2]; nz = header[3];
1129 
1130   /*
1131        Handle case where matrix is stored on disk as a dense matrix
1132   */
1133   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1134     return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1135   }
1136 
1137   /* determine ownership of all rows */
1138   m = M/size + ((M % size) > rank);
1139   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1140   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1141   rowners[0] = 0;
1142   for ( i=2; i<=size; i++ ) {
1143     rowners[i] += rowners[i-1];
1144   }
1145   rstart = rowners[rank];
1146   rend   = rowners[rank+1];
1147 
1148   /* distribute row lengths to all processors */
1149   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1150   offlens = ourlens + (rend-rstart);
1151   if (!rank) {
1152     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1153     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1154     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1155     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1156     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1157     PetscFree(sndcounts);
1158   }
1159   else {
1160     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1161   }
1162 
1163   if (!rank) {
1164     /* calculate the number of nonzeros on each processor */
1165     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1166     PetscMemzero(procsnz,size*sizeof(int));
1167     for ( i=0; i<size; i++ ) {
1168       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1169         procsnz[i] += rowlengths[j];
1170       }
1171     }
1172     PetscFree(rowlengths);
1173 
1174     /* determine max buffer needed and allocate it */
1175     maxnz = 0;
1176     for ( i=0; i<size; i++ ) {
1177       maxnz = PetscMax(maxnz,procsnz[i]);
1178     }
1179     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1180 
1181     /* read in my part of the matrix column indices  */
1182     nz = procsnz[0];
1183     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1184     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1185 
1186     /* read in every one elses and ship off */
1187     for ( i=1; i<size; i++ ) {
1188       nz = procsnz[i];
1189       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1190       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1191     }
1192     PetscFree(cols);
1193   }
1194   else {
1195     /* determine buffer space needed for message */
1196     nz = 0;
1197     for ( i=0; i<m; i++ ) {
1198       nz += ourlens[i];
1199     }
1200     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1201 
1202     /* receive message of column indices*/
1203     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1204     MPI_Get_count(&status,MPI_INT,&maxnz);
1205     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1206   }
1207 
1208   /* loop over local rows, determining number of off diagonal entries */
1209   PetscMemzero(offlens,m*sizeof(int));
1210   jj = 0;
1211   for ( i=0; i<m; i++ ) {
1212     for ( j=0; j<ourlens[i]; j++ ) {
1213       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1214       jj++;
1215     }
1216   }
1217 
1218   /* create our matrix */
1219   for ( i=0; i<m; i++ ) {
1220     ourlens[i] -= offlens[i];
1221   }
1222   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1223   A = *newmat;
1224   for ( i=0; i<m; i++ ) {
1225     ourlens[i] += offlens[i];
1226   }
1227 
1228   if (!rank) {
1229     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1230 
1231     /* read in my part of the matrix numerical values  */
1232     nz = procsnz[0];
1233     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1234 
1235     /* insert into matrix */
1236     jj      = rstart;
1237     smycols = mycols;
1238     svals   = vals;
1239     for ( i=0; i<m; i++ ) {
1240       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1241       smycols += ourlens[i];
1242       svals   += ourlens[i];
1243       jj++;
1244     }
1245 
1246     /* read in other processors and ship out */
1247     for ( i=1; i<size; i++ ) {
1248       nz = procsnz[i];
1249       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1250       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1251     }
1252     PetscFree(procsnz);
1253   }
1254   else {
1255     /* receive numeric values */
1256     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1257 
1258     /* receive message of values*/
1259     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1260     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1261     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file");
1262 
1263     /* insert into matrix */
1264     jj      = rstart;
1265     smycols = mycols;
1266     svals   = vals;
1267     for ( i=0; i<m; i++ ) {
1268       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1269       smycols += ourlens[i];
1270       svals   += ourlens[i];
1271       jj++;
1272     }
1273   }
1274   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1275 
1276   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1277   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1278   return 0;
1279 }
1280 
1281 
1282 
1283 
1284 
1285