xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 5cd905551efc76d042fcd2cc746dd1c50567705b)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.82 1998/04/03 23:14:50 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 #undef __FUNC__
13 #define __FUNC__ "MatSetValues_MPIDense"
14 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
15 {
16   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
17   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
18   int          roworiented = A->roworiented;
19 
20   PetscFunctionBegin;
21   for ( i=0; i<m; i++ ) {
22     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
23     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
24     if (idxm[i] >= rstart && idxm[i] < rend) {
25       row = idxm[i] - rstart;
26       if (roworiented) {
27         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
28       } else {
29         for ( j=0; j<n; j++ ) {
30           if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
31           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
32           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
33         }
34       }
35     } else {
36       if (roworiented) {
37         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
38       } else { /* must stash each seperately */
39         row = idxm[i];
40         for ( j=0; j<n; j++ ) {
41           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
42         }
43       }
44     }
45   }
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNC__
50 #define __FUNC__ "MatGetValues_MPIDense"
51 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
52 {
53   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
54   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
55 
56   PetscFunctionBegin;
57   for ( i=0; i<m; i++ ) {
58     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
59     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
60     if (idxm[i] >= rstart && idxm[i] < rend) {
61       row = idxm[i] - rstart;
62       for ( j=0; j<n; j++ ) {
63         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
64         if (idxn[j] >= mdn->N) {
65           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
66         }
67         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
68       }
69     } else {
70       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
71     }
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNC__
77 #define __FUNC__ "MatGetArray_MPIDense"
78 int MatGetArray_MPIDense(Mat A,Scalar **array)
79 {
80   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
81   int          ierr;
82 
83   PetscFunctionBegin;
84   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatRestoreArray_MPIDense"
90 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
91 {
92   PetscFunctionBegin;
93   PetscFunctionReturn(0);
94 }
95 
96 #undef __FUNC__
97 #define __FUNC__ "MatAssemblyBegin_MPIDense"
98 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
99 {
100   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
101   MPI_Comm     comm = mat->comm;
102   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
103   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
104   int          tag = mat->tag, *owner,*starts,count,ierr;
105   InsertMode   addv;
106   MPI_Request  *send_waits,*recv_waits;
107   Scalar       *rvalues,*svalues;
108 
109   PetscFunctionBegin;
110   /* make sure all processors are either in INSERTMODE or ADDMODE */
111   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
112   if (addv == (ADD_VALUES|INSERT_VALUES)) {
113     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
114   }
115   mat->insertmode = addv; /* in case this processor had no cache */
116 
117   /*  first count number of contributors to each processor */
118   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
119   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
120   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
121   for ( i=0; i<mdn->stash.n; i++ ) {
122     idx = mdn->stash.idx[i];
123     for ( j=0; j<size; j++ ) {
124       if (idx >= owners[j] && idx < owners[j+1]) {
125         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
126       }
127     }
128   }
129   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
130 
131   /* inform other processors of number of messages and max length*/
132   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
133   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
134   nreceives = work[rank];
135   if (nreceives > size) SETERRQ(PETSC_ERR_PLIB,0,"Internal PETSc error");
136   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
137   nmax = work[rank];
138   PetscFree(work);
139 
140   /* post receives:
141        1) each message will consist of ordered pairs
142      (global index,value) we store the global index as a double
143      to simplify the message passing.
144        2) since we don't know how long each individual message is we
145      allocate the largest needed buffer for each receive. Potentially
146      this is a lot of wasted space.
147 
148        This could be done better.
149   */
150   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
151   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
152   for ( i=0; i<nreceives; i++ ) {
153     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
154   }
155 
156   /* do sends:
157       1) starts[i] gives the starting index in svalues for stuff going to
158          the ith processor
159   */
160   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
161   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
162   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
163   starts[0] = 0;
164   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
165   for ( i=0; i<mdn->stash.n; i++ ) {
166     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
167     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
168     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
169   }
170   PetscFree(owner);
171   starts[0] = 0;
172   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
173   count = 0;
174   for ( i=0; i<size; i++ ) {
175     if (procs[i]) {
176       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
177     }
178   }
179   PetscFree(starts); PetscFree(nprocs);
180 
181   /* Free cache space */
182   PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n);
183   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
184 
185   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
186   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
187   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
188   mdn->rmax       = nmax;
189 
190   PetscFunctionReturn(0);
191 }
192 extern int MatSetUpMultiply_MPIDense(Mat);
193 
194 #undef __FUNC__
195 #define __FUNC__ "MatAssemblyEnd_MPIDense"
196 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
197 {
198   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
199   MPI_Status   *send_status,recv_status;
200   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
201   Scalar       *values,val;
202   InsertMode   addv = mat->insertmode;
203 
204   PetscFunctionBegin;
205   /*  wait on receives */
206   while (count) {
207     ierr = MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
208     /* unpack receives into our local space */
209     values = mdn->rvalues + 3*imdex*mdn->rmax;
210     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
211     n = n/3;
212     for ( i=0; i<n; i++ ) {
213       row = (int) PetscReal(values[3*i]) - mdn->rstart;
214       col = (int) PetscReal(values[3*i+1]);
215       val = values[3*i+2];
216       if (col >= 0 && col < mdn->N) {
217         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
218       }
219       else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid column");}
220     }
221     count--;
222   }
223   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
224 
225   /* wait on sends */
226   if (mdn->nsends) {
227     send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
228     ierr        = MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);CHKERRQ(ierr);
229     PetscFree(send_status);
230   }
231   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNC__
243 #define __FUNC__ "MatZeroEntries_MPIDense"
244 int MatZeroEntries_MPIDense(Mat A)
245 {
246   int          ierr;
247   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNC__
255 #define __FUNC__ "MatGetBlockSize_MPIDense"
256 int MatGetBlockSize_MPIDense(Mat A,int *bs)
257 {
258   PetscFunctionBegin;
259   *bs = 1;
260   PetscFunctionReturn(0);
261 }
262 
263 /* the code does not do the diagonal entries correctly unless the
264    matrix is square and the column and row owerships are identical.
265    This is a BUG. The only way to fix it seems to be to access
266    mdn->A and mdn->B directly and not through the MatZeroRows()
267    routine.
268 */
269 #undef __FUNC__
270 #define __FUNC__ "MatZeroRows_MPIDense"
271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
272 {
273   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
274   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
275   int            *procs,*nprocs,j,found,idx,nsends,*work;
276   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
277   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
278   int            *lens,imdex,*lrows,*values;
279   MPI_Comm       comm = A->comm;
280   MPI_Request    *send_waits,*recv_waits;
281   MPI_Status     recv_status,*send_status;
282   IS             istmp;
283 
284   PetscFunctionBegin;
285   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
286   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
287 
288   /*  first count number of contributors to each processor */
289   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
290   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
291   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
292   for ( i=0; i<N; i++ ) {
293     idx = rows[i];
294     found = 0;
295     for ( j=0; j<size; j++ ) {
296       if (idx >= owners[j] && idx < owners[j+1]) {
297         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
298       }
299     }
300     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
301   }
302   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
303 
304   /* inform other processors of number of messages and max length*/
305   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
306   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
307   nrecvs = work[rank];
308   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
309   nmax   = work[rank];
310   PetscFree(work);
311 
312   /* post receives:   */
313   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
314   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
315   for ( i=0; i<nrecvs; i++ ) {
316     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
317   }
318 
319   /* do sends:
320       1) starts[i] gives the starting index in svalues for stuff going to
321          the ith processor
322   */
323   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
324   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
325   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
326   starts[0]  = 0;
327   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
328   for ( i=0; i<N; i++ ) {
329     svalues[starts[owner[i]]++] = rows[i];
330   }
331   ISRestoreIndices(is,&rows);
332 
333   starts[0] = 0;
334   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
335   count = 0;
336   for ( i=0; i<size; i++ ) {
337     if (procs[i]) {
338       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
339     }
340   }
341   PetscFree(starts);
342 
343   base = owners[rank];
344 
345   /*  wait on receives */
346   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
347   source = lens + nrecvs;
348   count  = nrecvs; slen = 0;
349   while (count) {
350     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
351     /* unpack receives into our local space */
352     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
353     source[imdex]  = recv_status.MPI_SOURCE;
354     lens[imdex]  = n;
355     slen += n;
356     count--;
357   }
358   PetscFree(recv_waits);
359 
360   /* move the data into the send scatter */
361   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
362   count = 0;
363   for ( i=0; i<nrecvs; i++ ) {
364     values = rvalues + i*nmax;
365     for ( j=0; j<lens[i]; j++ ) {
366       lrows[count++] = values[j] - base;
367     }
368   }
369   PetscFree(rvalues); PetscFree(lens);
370   PetscFree(owner); PetscFree(nprocs);
371 
372   /* actually zap the local rows */
373   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
374   PLogObjectParent(A,istmp);
375   PetscFree(lrows);
376   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
377   ierr = ISDestroy(istmp); CHKERRQ(ierr);
378 
379   /* wait on sends */
380   if (nsends) {
381     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
382     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
383     PetscFree(send_status);
384   }
385   PetscFree(send_waits); PetscFree(svalues);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNC__
391 #define __FUNC__ "MatMult_MPIDense"
392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
395   int          ierr;
396 
397   PetscFunctionBegin;
398   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatMultAdd_MPIDense"
406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
409   int          ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNC__
419 #define __FUNC__ "MatMultTrans_MPIDense"
420 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
421 {
422   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
423   int          ierr;
424   Scalar       zero = 0.0;
425 
426   PetscFunctionBegin;
427   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
428   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
429   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
430   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNC__
435 #define __FUNC__ "MatMultTransAdd_MPIDense"
436 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
439   int          ierr;
440 
441   PetscFunctionBegin;
442   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
443   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
444   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
445   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNC__
450 #define __FUNC__ "MatGetDiagonal_MPIDense"
451 int MatGetDiagonal_MPIDense(Mat A,Vec v)
452 {
453   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
454   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
455   int          ierr, len, i, n, m = a->m, radd;
456   Scalar       *x, zero = 0.0;
457 
458   PetscFunctionBegin;
459   VecSet(&zero,v);
460   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
461   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
462   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
463   len = PetscMin(aloc->m,aloc->n);
464   radd = a->rstart*m;
465   for ( i=0; i<len; i++ ) {
466     x[i] = aloc->v[radd + i*m + i];
467   }
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNC__
472 #define __FUNC__ "MatDestroy_MPIDense"
473 int MatDestroy_MPIDense(Mat mat)
474 {
475   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
476   int          ierr;
477 
478   PetscFunctionBegin;
479 #if defined(USE_PETSC_LOG)
480   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
481 #endif
482   PetscFree(mdn->rowners);
483   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
484   if (mdn->lvec)   VecDestroy(mdn->lvec);
485   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
486   if (mdn->factor) {
487     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
488     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
489     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
490     PetscFree(mdn->factor);
491   }
492   PetscFree(mdn);
493   if (mat->mapping) {
494     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
495   }
496   PLogObjectDestroy(mat);
497   PetscHeaderDestroy(mat);
498   PetscFunctionReturn(0);
499 }
500 
501 #include "pinclude/pviewer.h"
502 
503 #undef __FUNC__
504 #define __FUNC__ "MatView_MPIDense_Binary"
505 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
506 {
507   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
508   int          ierr;
509 
510   PetscFunctionBegin;
511   if (mdn->size == 1) {
512     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
513   }
514   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
515   PetscFunctionReturn(0);
516 }
517 
518 #undef __FUNC__
519 #define __FUNC__ "MatView_MPIDense_ASCII"
520 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
521 {
522   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
523   int          ierr, format;
524   FILE         *fd;
525   ViewerType   vtype;
526 
527   PetscFunctionBegin;
528   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
529   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
530   ierr = ViewerGetFormat(viewer,&format);
531   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
532     int rank;
533     MatInfo info;
534     MPI_Comm_rank(mat->comm,&rank);
535     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
536     PetscSequentialPhaseBegin(mat->comm,1);
537       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
538          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
539       fflush(fd);
540     PetscSequentialPhaseEnd(mat->comm,1);
541     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
542     PetscFunctionReturn(0);
543   }
544   else if (format == VIEWER_FORMAT_ASCII_INFO) {
545     PetscFunctionReturn(0);
546   }
547   if (vtype == ASCII_FILE_VIEWER) {
548     PetscSequentialPhaseBegin(mat->comm,1);
549     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
550              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
551     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
552     fflush(fd);
553     PetscSequentialPhaseEnd(mat->comm,1);
554   } else {
555     int size = mdn->size, rank = mdn->rank;
556     if (size == 1) {
557       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
558     } else {
559       /* assemble the entire matrix onto first processor. */
560       Mat          A;
561       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
562       Scalar       *vals;
563       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
564 
565       if (!rank) {
566         ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
567       } else {
568         ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
569       }
570       PLogObjectParent(mat,A);
571 
572       /* Copy the matrix ... This isn't the most efficient means,
573          but it's quick for now */
574       row = mdn->rstart; m = Amdn->m;
575       for ( i=0; i<m; i++ ) {
576         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
577         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
578         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
579         row++;
580       }
581 
582       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
583       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
584       if (!rank) {
585         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
586       }
587       ierr = MatDestroy(A); CHKERRQ(ierr);
588     }
589   }
590   PetscFunctionReturn(0);
591 }
592 
593 #undef __FUNC__
594 #define __FUNC__ "MatView_MPIDense"
595 int MatView_MPIDense(Mat mat,Viewer viewer)
596 {
597   int          ierr;
598   ViewerType   vtype;
599 
600   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
601   if (vtype == ASCII_FILE_VIEWER) {
602     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
603   } else if (vtype == ASCII_FILES_VIEWER) {
604     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
605   } else if (vtype == BINARY_FILE_VIEWER) {
606     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
607   } else {
608     SETERRQ(1,1,"Viewer type not supported by PETSc object");
609   }
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNC__
614 #define __FUNC__ "MatGetInfo_MPIDense"
615 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
616 {
617   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
618   Mat          mdn = mat->A;
619   int          ierr;
620   double       isend[5], irecv[5];
621 
622   PetscFunctionBegin;
623   info->rows_global    = (double)mat->M;
624   info->columns_global = (double)mat->N;
625   info->rows_local     = (double)mat->m;
626   info->columns_local  = (double)mat->N;
627   info->block_size     = 1.0;
628   ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr);
629   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
630   isend[3] = info->memory;  isend[4] = info->mallocs;
631   if (flag == MAT_LOCAL) {
632     info->nz_used      = isend[0];
633     info->nz_allocated = isend[1];
634     info->nz_unneeded  = isend[2];
635     info->memory       = isend[3];
636     info->mallocs      = isend[4];
637   } else if (flag == MAT_GLOBAL_MAX) {
638     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,A->comm);CHKERRQ(ierr);
639     info->nz_used      = irecv[0];
640     info->nz_allocated = irecv[1];
641     info->nz_unneeded  = irecv[2];
642     info->memory       = irecv[3];
643     info->mallocs      = irecv[4];
644   } else if (flag == MAT_GLOBAL_SUM) {
645     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,A->comm);CHKERRQ(ierr);
646     info->nz_used      = irecv[0];
647     info->nz_allocated = irecv[1];
648     info->nz_unneeded  = irecv[2];
649     info->memory       = irecv[3];
650     info->mallocs      = irecv[4];
651   }
652   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
653   info->fill_ratio_needed = 0;
654   info->factor_mallocs    = 0;
655   PetscFunctionReturn(0);
656 }
657 
658 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
659    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
660    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
661    extern int MatSolve_MPIDense(Mat,Vec,Vec);
662    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
663    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
664    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
665 
666 #undef __FUNC__
667 #define __FUNC__ "MatSetOption_MPIDense"
668 int MatSetOption_MPIDense(Mat A,MatOption op)
669 {
670   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
671 
672   PetscFunctionBegin;
673   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
674       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
675       op == MAT_NEW_NONZERO_LOCATION_ERROR ||
676       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
677       op == MAT_COLUMNS_SORTED ||
678       op == MAT_COLUMNS_UNSORTED) {
679         MatSetOption(a->A,op);
680   } else if (op == MAT_ROW_ORIENTED) {
681         a->roworiented = 1;
682         MatSetOption(a->A,op);
683   } else if (op == MAT_ROWS_SORTED ||
684              op == MAT_ROWS_UNSORTED ||
685              op == MAT_SYMMETRIC ||
686              op == MAT_STRUCTURALLY_SYMMETRIC ||
687              op == MAT_YES_NEW_DIAGONALS ||
688              op == MAT_USE_HASH_TABLE) {
689     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
690   } else if (op == MAT_COLUMN_ORIENTED) {
691     a->roworiented = 0; MatSetOption(a->A,op);
692   } else if (op == MAT_NO_NEW_DIAGONALS) {
693     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
694   } else {
695     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
696   }
697   PetscFunctionReturn(0);
698 }
699 
700 #undef __FUNC__
701 #define __FUNC__ "MatGetSize_MPIDense"
702 int MatGetSize_MPIDense(Mat A,int *m,int *n)
703 {
704   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
705 
706   PetscFunctionBegin;
707   *m = mat->M; *n = mat->N;
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNC__
712 #define __FUNC__ "MatGetLocalSize_MPIDense"
713 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
714 {
715   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
716 
717   PetscFunctionBegin;
718   *m = mat->m; *n = mat->N;
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNC__
723 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
724 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
725 {
726   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
727 
728   PetscFunctionBegin;
729   *m = mat->rstart; *n = mat->rend;
730   PetscFunctionReturn(0);
731 }
732 
733 #undef __FUNC__
734 #define __FUNC__ "MatGetRow_MPIDense"
735 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
736 {
737   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
738   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
739 
740   PetscFunctionBegin;
741   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
742   lrow = row - rstart;
743   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
744   PetscFunctionReturn(0);
745 }
746 
747 #undef __FUNC__
748 #define __FUNC__ "MatRestoreRow_MPIDense"
749 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
750 {
751   PetscFunctionBegin;
752   if (idx) PetscFree(*idx);
753   if (v) PetscFree(*v);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNC__
758 #define __FUNC__ "MatNorm_MPIDense"
759 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
760 {
761   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
762   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
763   int          ierr, i, j;
764   double       sum = 0.0;
765   Scalar       *v = mat->v;
766 
767   PetscFunctionBegin;
768   if (mdn->size == 1) {
769     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
770   } else {
771     if (type == NORM_FROBENIUS) {
772       for (i=0; i<mat->n*mat->m; i++ ) {
773 #if defined(USE_PETSC_COMPLEX)
774         sum += real(conj(*v)*(*v)); v++;
775 #else
776         sum += (*v)*(*v); v++;
777 #endif
778       }
779       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
780       *norm = sqrt(*norm);
781       PLogFlops(2*mat->n*mat->m);
782     } else if (type == NORM_1) {
783       double *tmp, *tmp2;
784       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
785       tmp2 = tmp + mdn->N;
786       PetscMemzero(tmp,2*mdn->N*sizeof(double));
787       *norm = 0.0;
788       v = mat->v;
789       for ( j=0; j<mat->n; j++ ) {
790         for ( i=0; i<mat->m; i++ ) {
791           tmp[j] += PetscAbsScalar(*v);  v++;
792         }
793       }
794       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
795       for ( j=0; j<mdn->N; j++ ) {
796         if (tmp2[j] > *norm) *norm = tmp2[j];
797       }
798       PetscFree(tmp);
799       PLogFlops(mat->n*mat->m);
800     } else if (type == NORM_INFINITY) { /* max row norm */
801       double ntemp;
802       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
803       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
804     } else {
805       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
806     }
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNC__
812 #define __FUNC__ "MatTranspose_MPIDense"
813 int MatTranspose_MPIDense(Mat A,Mat *matout)
814 {
815   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
816   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
817   Mat          B;
818   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
819   int          j, i, ierr;
820   Scalar       *v;
821 
822   PetscFunctionBegin;
823   if (matout == PETSC_NULL && M != N) {
824     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
825   }
826   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
827 
828   m = Aloc->m; n = Aloc->n; v = Aloc->v;
829   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
830   for ( j=0; j<n; j++ ) {
831     for (i=0; i<m; i++) rwork[i] = rstart + i;
832     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
833     v   += m;
834   }
835   PetscFree(rwork);
836   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
837   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
838   if (matout != PETSC_NULL) {
839     *matout = B;
840   } else {
841     PetscOps       *Abops;
842     struct _MatOps *Aops;
843 
844     /* This isn't really an in-place transpose, but free data struct from a */
845     PetscFree(a->rowners);
846     ierr = MatDestroy(a->A); CHKERRQ(ierr);
847     if (a->lvec) VecDestroy(a->lvec);
848     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
849     PetscFree(a);
850 
851     /*
852          This is horrible, horrible code. We need to keep the
853       A pointers for the bops and ops but copy everything
854       else from C.
855     */
856     Abops = A->bops;
857     Aops  = A->ops;
858     PetscMemcpy(A,B,sizeof(struct _p_Mat));
859     A->bops = Abops;
860     A->ops  = Aops;
861 
862     PetscHeaderDestroy(B);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 #include "pinclude/plapack.h"
868 #undef __FUNC__
869 #define __FUNC__ "MatScale_MPIDense"
870 int MatScale_MPIDense(Scalar *alpha,Mat inA)
871 {
872   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
873   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
874   int          one = 1, nz;
875 
876   PetscFunctionBegin;
877   nz = a->m*a->n;
878   BLscal_( &nz, alpha, a->v, &one );
879   PLogFlops(nz);
880   PetscFunctionReturn(0);
881 }
882 
883 static int MatConvertSameType_MPIDense(Mat,Mat *,int);
884 
885 /* -------------------------------------------------------------------*/
886 static struct _MatOps MatOps = {MatSetValues_MPIDense,
887        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
888        MatMult_MPIDense,MatMultAdd_MPIDense,
889        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
890 /*       MatSolve_MPIDense,0, */
891        0,0,
892        0,0,
893        0,0,
894 /*       MatLUFactor_MPIDense,0, */
895        0,MatTranspose_MPIDense,
896        MatGetInfo_MPIDense,0,
897        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
898        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
899        0,
900        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
901        0,0,
902 /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
903        0,0,
904        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
905        MatGetOwnershipRange_MPIDense,
906        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
907        MatConvertSameType_MPIDense,
908        0,0,0,0,0,
909        0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense,
910        0,0,0,MatGetBlockSize_MPIDense};
911 
912 #undef __FUNC__
913 #define __FUNC__ "MatCreateMPIDense"
914 /*@C
915    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
916 
917    Input Parameters:
918 .  comm - MPI communicator
919 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
920 .  n - number of local columns (or PETSC_DECIDE to have calculated
921            if N is given)
922 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
923 .  N - number of global columns (or PETSC_DECIDE to have calculated
924            if n is given)
925 .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
926    to control all matrix memory allocation.
927 
928    Output Parameter:
929 .  A - the matrix
930 
931    Notes:
932    The dense format is fully compatible with standard Fortran 77
933    storage by columns.
934 
935    The data input variable is intended primarily for Fortran programmers
936    who wish to allocate their own matrix memory space.  Most users should
937    set data=PETSC_NULL.
938 
939    The user MUST specify either the local or global matrix dimensions
940    (possibly both).
941 
942    Currently, the only parallel dense matrix decomposition is by rows,
943    so that n=N and each submatrix owns all of the global columns.
944 
945 .keywords: matrix, dense, parallel
946 
947 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
948 @*/
949 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
950 {
951   Mat          mat;
952   Mat_MPIDense *a;
953   int          ierr, i,flg;
954 
955   PetscFunctionBegin;
956   /* Note:  For now, when data is specified above, this assumes the user correctly
957    allocates the local dense storage space.  We should add error checking. */
958 
959   *A = 0;
960   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,comm,MatDestroy,MatView);
961   PLogObjectCreate(mat);
962   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
963   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
964   mat->ops->destroy    = MatDestroy_MPIDense;
965   mat->ops->view       = MatView_MPIDense;
966   mat->factor          = 0;
967   mat->mapping         = 0;
968 
969   a->factor       = 0;
970   mat->insertmode = NOT_SET_VALUES;
971   MPI_Comm_rank(comm,&a->rank);
972   MPI_Comm_size(comm,&a->size);
973 
974   if (M == PETSC_DECIDE) {ierr = MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);}
975   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
976 
977   /* each row stores all columns */
978   if (N == PETSC_DECIDE) N = n;
979   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
980   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
981   a->N = mat->N = N;
982   a->M = mat->M = M;
983   a->m = mat->m = m;
984   a->n = mat->n = n;
985 
986   /* build local table of row and column ownerships */
987   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
988   a->cowners = a->rowners + a->size + 1;
989   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
990   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
991   a->rowners[0] = 0;
992   for ( i=2; i<=a->size; i++ ) {
993     a->rowners[i] += a->rowners[i-1];
994   }
995   a->rstart = a->rowners[a->rank];
996   a->rend   = a->rowners[a->rank+1];
997   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
998   a->cowners[0] = 0;
999   for ( i=2; i<=a->size; i++ ) {
1000     a->cowners[i] += a->cowners[i-1];
1001   }
1002 
1003   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
1004   PLogObjectParent(mat,a->A);
1005 
1006   /* build cache for off array entries formed */
1007   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1008 
1009   /* stuff used for matrix vector multiply */
1010   a->lvec        = 0;
1011   a->Mvctx       = 0;
1012   a->roworiented = 1;
1013 
1014   *A = mat;
1015   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1016   if (flg) {
1017     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1018   }
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNC__
1023 #define __FUNC__ "MatConvertSameType_MPIDense"
1024 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
1025 {
1026   Mat          mat;
1027   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1028   int          ierr;
1029   FactorCtx    *factor;
1030 
1031   PetscFunctionBegin;
1032   *newmat       = 0;
1033   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,A->comm,MatDestroy,MatView);
1034   PLogObjectCreate(mat);
1035   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
1036   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
1037   mat->ops->destroy   = MatDestroy_MPIDense;
1038   mat->ops->view      = MatView_MPIDense;
1039   mat->factor         = A->factor;
1040   mat->assembled      = PETSC_TRUE;
1041 
1042   a->m = mat->m = oldmat->m;
1043   a->n = mat->n = oldmat->n;
1044   a->M = mat->M = oldmat->M;
1045   a->N = mat->N = oldmat->N;
1046   if (oldmat->factor) {
1047     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
1048     /* copy factor contents ... add this code! */
1049   } else a->factor = 0;
1050 
1051   a->rstart       = oldmat->rstart;
1052   a->rend         = oldmat->rend;
1053   a->size         = oldmat->size;
1054   a->rank         = oldmat->rank;
1055   mat->insertmode = NOT_SET_VALUES;
1056 
1057   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1058   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1059   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1060   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1061 
1062   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1063   PLogObjectParent(mat,a->lvec);
1064   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1065   PLogObjectParent(mat,a->Mvctx);
1066   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1067   PLogObjectParent(mat,a->A);
1068   *newmat = mat;
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #include "sys.h"
1073 
1074 #undef __FUNC__
1075 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1076 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1077 {
1078   int        *rowners, i,size,rank,m,ierr,nz,j;
1079   Scalar     *array,*vals,*vals_ptr;
1080   MPI_Status status;
1081 
1082   PetscFunctionBegin;
1083   MPI_Comm_rank(comm,&rank);
1084   MPI_Comm_size(comm,&size);
1085 
1086   /* determine ownership of all rows */
1087   m          = M/size + ((M % size) > rank);
1088   rowners    = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1089   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1090   rowners[0] = 0;
1091   for ( i=2; i<=size; i++ ) {
1092     rowners[i] += rowners[i-1];
1093   }
1094 
1095   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1096   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
1097 
1098   if (!rank) {
1099     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1100 
1101     /* read in my part of the matrix numerical values  */
1102     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR); CHKERRQ(ierr);
1103 
1104     /* insert into matrix-by row (this is why cannot directly read into array */
1105     vals_ptr = vals;
1106     for ( i=0; i<m; i++ ) {
1107       for ( j=0; j<N; j++ ) {
1108         array[i + j*m] = *vals_ptr++;
1109       }
1110     }
1111 
1112     /* read in other processors and ship out */
1113     for ( i=1; i<size; i++ ) {
1114       nz   = (rowners[i+1] - rowners[i])*N;
1115       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1116       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1117     }
1118   } else {
1119     /* receive numeric values */
1120     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1121 
1122     /* receive message of values*/
1123     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1124 
1125     /* insert into matrix-by row (this is why cannot directly read into array */
1126     vals_ptr = vals;
1127     for ( i=0; i<m; i++ ) {
1128       for ( j=0; j<N; j++ ) {
1129         array[i + j*m] = *vals_ptr++;
1130       }
1131     }
1132   }
1133   PetscFree(rowners);
1134   PetscFree(vals);
1135   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1136   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 
1141 #undef __FUNC__
1142 #define __FUNC__ "MatLoad_MPIDense"
1143 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1144 {
1145   Mat          A;
1146   Scalar       *vals,*svals;
1147   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1148   MPI_Status   status;
1149   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1150   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1151   int          tag = ((PetscObject)viewer)->tag;
1152   int          i, nz, ierr, j,rstart, rend, fd;
1153 
1154   PetscFunctionBegin;
1155   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1156   if (!rank) {
1157     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1158     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1159     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1160   }
1161 
1162   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1163   M = header[1]; N = header[2]; nz = header[3];
1164 
1165   /*
1166        Handle case where matrix is stored on disk as a dense matrix
1167   */
1168   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1169     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1170     PetscFunctionReturn(0);
1171   }
1172 
1173   /* determine ownership of all rows */
1174   m          = M/size + ((M % size) > rank);
1175   rowners    = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1176   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1177   rowners[0] = 0;
1178   for ( i=2; i<=size; i++ ) {
1179     rowners[i] += rowners[i-1];
1180   }
1181   rstart = rowners[rank];
1182   rend   = rowners[rank+1];
1183 
1184   /* distribute row lengths to all processors */
1185   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1186   offlens = ourlens + (rend-rstart);
1187   if (!rank) {
1188     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1189     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1190     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1191     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1192     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1193     PetscFree(sndcounts);
1194   } else {
1195     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1196   }
1197 
1198   if (!rank) {
1199     /* calculate the number of nonzeros on each processor */
1200     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1201     PetscMemzero(procsnz,size*sizeof(int));
1202     for ( i=0; i<size; i++ ) {
1203       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1204         procsnz[i] += rowlengths[j];
1205       }
1206     }
1207     PetscFree(rowlengths);
1208 
1209     /* determine max buffer needed and allocate it */
1210     maxnz = 0;
1211     for ( i=0; i<size; i++ ) {
1212       maxnz = PetscMax(maxnz,procsnz[i]);
1213     }
1214     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1215 
1216     /* read in my part of the matrix column indices  */
1217     nz = procsnz[0];
1218     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1219     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1220 
1221     /* read in every one elses and ship off */
1222     for ( i=1; i<size; i++ ) {
1223       nz   = procsnz[i];
1224       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1225       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1226     }
1227     PetscFree(cols);
1228   } else {
1229     /* determine buffer space needed for message */
1230     nz = 0;
1231     for ( i=0; i<m; i++ ) {
1232       nz += ourlens[i];
1233     }
1234     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1235 
1236     /* receive message of column indices*/
1237     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1238     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1239     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1240   }
1241 
1242   /* loop over local rows, determining number of off diagonal entries */
1243   PetscMemzero(offlens,m*sizeof(int));
1244   jj = 0;
1245   for ( i=0; i<m; i++ ) {
1246     for ( j=0; j<ourlens[i]; j++ ) {
1247       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1248       jj++;
1249     }
1250   }
1251 
1252   /* create our matrix */
1253   for ( i=0; i<m; i++ ) {
1254     ourlens[i] -= offlens[i];
1255   }
1256   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1257   A = *newmat;
1258   for ( i=0; i<m; i++ ) {
1259     ourlens[i] += offlens[i];
1260   }
1261 
1262   if (!rank) {
1263     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1264 
1265     /* read in my part of the matrix numerical values  */
1266     nz = procsnz[0];
1267     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1268 
1269     /* insert into matrix */
1270     jj      = rstart;
1271     smycols = mycols;
1272     svals   = vals;
1273     for ( i=0; i<m; i++ ) {
1274       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1275       smycols += ourlens[i];
1276       svals   += ourlens[i];
1277       jj++;
1278     }
1279 
1280     /* read in other processors and ship out */
1281     for ( i=1; i<size; i++ ) {
1282       nz   = procsnz[i];
1283       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1284       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1285     }
1286     PetscFree(procsnz);
1287   } else {
1288     /* receive numeric values */
1289     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1290 
1291     /* receive message of values*/
1292     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1293     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1294     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1295 
1296     /* insert into matrix */
1297     jj      = rstart;
1298     smycols = mycols;
1299     svals   = vals;
1300     for ( i=0; i<m; i++ ) {
1301       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1302       smycols += ourlens[i];
1303       svals   += ourlens[i];
1304       jj++;
1305     }
1306   }
1307   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1308 
1309   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1310   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 
1315 
1316 
1317 
1318