xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision e1311b9049e89cb3452dcd306fde571f4b440ff2)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.81 1998/03/16 18:55:07 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 #undef __FUNC__
13 #define __FUNC__ "MatSetValues_MPIDense"
14 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
15 {
16   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
17   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
18   int          roworiented = A->roworiented;
19 
20   PetscFunctionBegin;
21   for ( i=0; i<m; i++ ) {
22     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
23     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
24     if (idxm[i] >= rstart && idxm[i] < rend) {
25       row = idxm[i] - rstart;
26       if (roworiented) {
27         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
28       } else {
29         for ( j=0; j<n; j++ ) {
30           if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
31           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
32           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
33         }
34       }
35     } else {
36       if (roworiented) {
37         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
38       } else { /* must stash each seperately */
39         row = idxm[i];
40         for ( j=0; j<n; j++ ) {
41           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
42         }
43       }
44     }
45   }
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNC__
50 #define __FUNC__ "MatGetValues_MPIDense"
51 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
52 {
53   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
54   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
55 
56   PetscFunctionBegin;
57   for ( i=0; i<m; i++ ) {
58     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
59     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
60     if (idxm[i] >= rstart && idxm[i] < rend) {
61       row = idxm[i] - rstart;
62       for ( j=0; j<n; j++ ) {
63         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
64         if (idxn[j] >= mdn->N) {
65           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
66         }
67         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
68       }
69     } else {
70       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
71     }
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNC__
77 #define __FUNC__ "MatGetArray_MPIDense"
78 int MatGetArray_MPIDense(Mat A,Scalar **array)
79 {
80   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
81   int          ierr;
82 
83   PetscFunctionBegin;
84   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatRestoreArray_MPIDense"
90 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
91 {
92   PetscFunctionBegin;
93   PetscFunctionReturn(0);
94 }
95 
96 #undef __FUNC__
97 #define __FUNC__ "MatAssemblyBegin_MPIDense"
98 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
99 {
100   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
101   MPI_Comm     comm = mat->comm;
102   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
103   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
104   int          tag = mat->tag, *owner,*starts,count,ierr;
105   InsertMode   addv;
106   MPI_Request  *send_waits,*recv_waits;
107   Scalar       *rvalues,*svalues;
108 
109   PetscFunctionBegin;
110   /* make sure all processors are either in INSERTMODE or ADDMODE */
111   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
112   if (addv == (ADD_VALUES|INSERT_VALUES)) {
113     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
114   }
115   mat->insertmode = addv; /* in case this processor had no cache */
116 
117   /*  first count number of contributors to each processor */
118   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
119   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
120   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
121   for ( i=0; i<mdn->stash.n; i++ ) {
122     idx = mdn->stash.idx[i];
123     for ( j=0; j<size; j++ ) {
124       if (idx >= owners[j] && idx < owners[j+1]) {
125         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
126       }
127     }
128   }
129   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
130 
131   /* inform other processors of number of messages and max length*/
132   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
133   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
134   nreceives = work[rank];
135   if (nreceives > size) SETERRQ(PETSC_ERR_PLIB,0,"Internal PETSc error");
136   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
137   nmax = work[rank];
138   PetscFree(work);
139 
140   /* post receives:
141        1) each message will consist of ordered pairs
142      (global index,value) we store the global index as a double
143      to simplify the message passing.
144        2) since we don't know how long each individual message is we
145      allocate the largest needed buffer for each receive. Potentially
146      this is a lot of wasted space.
147 
148        This could be done better.
149   */
150   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
151   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
152   for ( i=0; i<nreceives; i++ ) {
153     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
154   }
155 
156   /* do sends:
157       1) starts[i] gives the starting index in svalues for stuff going to
158          the ith processor
159   */
160   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
161   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
162   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
163   starts[0] = 0;
164   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
165   for ( i=0; i<mdn->stash.n; i++ ) {
166     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
167     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
168     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
169   }
170   PetscFree(owner);
171   starts[0] = 0;
172   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
173   count = 0;
174   for ( i=0; i<size; i++ ) {
175     if (procs[i]) {
176       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
177     }
178   }
179   PetscFree(starts); PetscFree(nprocs);
180 
181   /* Free cache space */
182   PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n);
183   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
184 
185   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
186   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
187   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
188   mdn->rmax       = nmax;
189 
190   PetscFunctionReturn(0);
191 }
192 extern int MatSetUpMultiply_MPIDense(Mat);
193 
194 #undef __FUNC__
195 #define __FUNC__ "MatAssemblyEnd_MPIDense"
196 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
197 {
198   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
199   MPI_Status   *send_status,recv_status;
200   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
201   Scalar       *values,val;
202   InsertMode   addv = mat->insertmode;
203 
204   PetscFunctionBegin;
205   /*  wait on receives */
206   while (count) {
207     ierr = MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
208     /* unpack receives into our local space */
209     values = mdn->rvalues + 3*imdex*mdn->rmax;
210     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
211     n = n/3;
212     for ( i=0; i<n; i++ ) {
213       row = (int) PetscReal(values[3*i]) - mdn->rstart;
214       col = (int) PetscReal(values[3*i+1]);
215       val = values[3*i+2];
216       if (col >= 0 && col < mdn->N) {
217         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
218       }
219       else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid column");}
220     }
221     count--;
222   }
223   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
224 
225   /* wait on sends */
226   if (mdn->nsends) {
227     send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
228     ierr        = MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);CHKERRQ(ierr);
229     PetscFree(send_status);
230   }
231   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNC__
243 #define __FUNC__ "MatZeroEntries_MPIDense"
244 int MatZeroEntries_MPIDense(Mat A)
245 {
246   int          ierr;
247   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNC__
255 #define __FUNC__ "MatGetBlockSize_MPIDense"
256 int MatGetBlockSize_MPIDense(Mat A,int *bs)
257 {
258   PetscFunctionBegin;
259   *bs = 1;
260   PetscFunctionReturn(0);
261 }
262 
263 /* the code does not do the diagonal entries correctly unless the
264    matrix is square and the column and row owerships are identical.
265    This is a BUG. The only way to fix it seems to be to access
266    mdn->A and mdn->B directly and not through the MatZeroRows()
267    routine.
268 */
269 #undef __FUNC__
270 #define __FUNC__ "MatZeroRows_MPIDense"
271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
272 {
273   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
274   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
275   int            *procs,*nprocs,j,found,idx,nsends,*work;
276   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
277   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
278   int            *lens,imdex,*lrows,*values;
279   MPI_Comm       comm = A->comm;
280   MPI_Request    *send_waits,*recv_waits;
281   MPI_Status     recv_status,*send_status;
282   IS             istmp;
283 
284   PetscFunctionBegin;
285   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
286   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
287 
288   /*  first count number of contributors to each processor */
289   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
290   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
291   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
292   for ( i=0; i<N; i++ ) {
293     idx = rows[i];
294     found = 0;
295     for ( j=0; j<size; j++ ) {
296       if (idx >= owners[j] && idx < owners[j+1]) {
297         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
298       }
299     }
300     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
301   }
302   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
303 
304   /* inform other processors of number of messages and max length*/
305   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
306   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
307   nrecvs = work[rank];
308   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
309   nmax   = work[rank];
310   PetscFree(work);
311 
312   /* post receives:   */
313   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
314   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
315   for ( i=0; i<nrecvs; i++ ) {
316     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
317   }
318 
319   /* do sends:
320       1) starts[i] gives the starting index in svalues for stuff going to
321          the ith processor
322   */
323   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
324   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
325   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
326   starts[0]  = 0;
327   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
328   for ( i=0; i<N; i++ ) {
329     svalues[starts[owner[i]]++] = rows[i];
330   }
331   ISRestoreIndices(is,&rows);
332 
333   starts[0] = 0;
334   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
335   count = 0;
336   for ( i=0; i<size; i++ ) {
337     if (procs[i]) {
338       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
339     }
340   }
341   PetscFree(starts);
342 
343   base = owners[rank];
344 
345   /*  wait on receives */
346   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
347   source = lens + nrecvs;
348   count  = nrecvs; slen = 0;
349   while (count) {
350     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
351     /* unpack receives into our local space */
352     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
353     source[imdex]  = recv_status.MPI_SOURCE;
354     lens[imdex]  = n;
355     slen += n;
356     count--;
357   }
358   PetscFree(recv_waits);
359 
360   /* move the data into the send scatter */
361   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
362   count = 0;
363   for ( i=0; i<nrecvs; i++ ) {
364     values = rvalues + i*nmax;
365     for ( j=0; j<lens[i]; j++ ) {
366       lrows[count++] = values[j] - base;
367     }
368   }
369   PetscFree(rvalues); PetscFree(lens);
370   PetscFree(owner); PetscFree(nprocs);
371 
372   /* actually zap the local rows */
373   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
374   PLogObjectParent(A,istmp);
375   PetscFree(lrows);
376   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
377   ierr = ISDestroy(istmp); CHKERRQ(ierr);
378 
379   /* wait on sends */
380   if (nsends) {
381     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
382     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
383     PetscFree(send_status);
384   }
385   PetscFree(send_waits); PetscFree(svalues);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNC__
391 #define __FUNC__ "MatMult_MPIDense"
392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
395   int          ierr;
396 
397   PetscFunctionBegin;
398   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatMultAdd_MPIDense"
406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
409   int          ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNC__
419 #define __FUNC__ "MatMultTrans_MPIDense"
420 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
421 {
422   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
423   int          ierr;
424   Scalar       zero = 0.0;
425 
426   PetscFunctionBegin;
427   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
428   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
429   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
430   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNC__
435 #define __FUNC__ "MatMultTransAdd_MPIDense"
436 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
439   int          ierr;
440 
441   PetscFunctionBegin;
442   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
443   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
444   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
445   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNC__
450 #define __FUNC__ "MatGetDiagonal_MPIDense"
451 int MatGetDiagonal_MPIDense(Mat A,Vec v)
452 {
453   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
454   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
455   int          ierr, len, i, n, m = a->m, radd;
456   Scalar       *x, zero = 0.0;
457 
458   PetscFunctionBegin;
459   VecSet(&zero,v);
460   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
461   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
462   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
463   len = PetscMin(aloc->m,aloc->n);
464   radd = a->rstart*m;
465   for ( i=0; i<len; i++ ) {
466     x[i] = aloc->v[radd + i*m + i];
467   }
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNC__
472 #define __FUNC__ "MatDestroy_MPIDense"
473 int MatDestroy_MPIDense(Mat mat)
474 {
475   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
476   int          ierr;
477 
478   PetscFunctionBegin;
479 #if defined(USE_PETSC_LOG)
480   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
481 #endif
482   PetscFree(mdn->rowners);
483   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
484   if (mdn->lvec)   VecDestroy(mdn->lvec);
485   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
486   if (mdn->factor) {
487     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
488     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
489     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
490     PetscFree(mdn->factor);
491   }
492   PetscFree(mdn);
493   if (mat->mapping) {
494     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
495   }
496   PLogObjectDestroy(mat);
497   PetscHeaderDestroy(mat);
498   PetscFunctionReturn(0);
499 }
500 
501 #include "pinclude/pviewer.h"
502 
503 #undef __FUNC__
504 #define __FUNC__ "MatView_MPIDense_Binary"
505 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
506 {
507   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
508   int          ierr;
509 
510   PetscFunctionBegin;
511   if (mdn->size == 1) {
512     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
513   }
514   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
515   PetscFunctionReturn(0);
516 }
517 
518 #undef __FUNC__
519 #define __FUNC__ "MatView_MPIDense_ASCII"
520 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
521 {
522   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
523   int          ierr, format;
524   FILE         *fd;
525   ViewerType   vtype;
526 
527   PetscFunctionBegin;
528   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
529   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
530   ierr = ViewerGetFormat(viewer,&format);
531   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
532     int rank;
533     MatInfo info;
534     MPI_Comm_rank(mat->comm,&rank);
535     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
536     PetscSequentialPhaseBegin(mat->comm,1);
537       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
538          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
539       fflush(fd);
540     PetscSequentialPhaseEnd(mat->comm,1);
541     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
542     PetscFunctionReturn(0);
543   }
544   else if (format == VIEWER_FORMAT_ASCII_INFO) {
545     PetscFunctionReturn(0);
546   }
547   if (vtype == ASCII_FILE_VIEWER) {
548     PetscSequentialPhaseBegin(mat->comm,1);
549     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n",
550              mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n);
551     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
552     fflush(fd);
553     PetscSequentialPhaseEnd(mat->comm,1);
554   } else {
555     int size = mdn->size, rank = mdn->rank;
556     if (size == 1) {
557       ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
558     } else {
559       /* assemble the entire matrix onto first processor. */
560       Mat          A;
561       int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
562       Scalar       *vals;
563       Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
564 
565       if (!rank) {
566         ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
567       } else {
568         ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
569       }
570       PLogObjectParent(mat,A);
571 
572       /* Copy the matrix ... This isn't the most efficient means,
573          but it's quick for now */
574       row = mdn->rstart; m = Amdn->m;
575       for ( i=0; i<m; i++ ) {
576         ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
577         ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
578         ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
579         row++;
580       }
581 
582       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
583       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
584       if (!rank) {
585         ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
586       }
587       ierr = MatDestroy(A); CHKERRQ(ierr);
588     }
589   }
590   PetscFunctionReturn(0);
591 }
592 
593 #undef __FUNC__
594 #define __FUNC__ "MatView_MPIDense"
595 int MatView_MPIDense(Mat mat,Viewer viewer)
596 {
597   int          ierr;
598   ViewerType   vtype;
599 
600   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
601   if (vtype == ASCII_FILE_VIEWER) {
602     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
603   } else if (vtype == ASCII_FILES_VIEWER) {
604     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
605   } else if (vtype == BINARY_FILE_VIEWER) {
606     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNC__
612 #define __FUNC__ "MatGetInfo_MPIDense"
613 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
614 {
615   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
616   Mat          mdn = mat->A;
617   int          ierr;
618   double       isend[5], irecv[5];
619 
620   PetscFunctionBegin;
621   info->rows_global    = (double)mat->M;
622   info->columns_global = (double)mat->N;
623   info->rows_local     = (double)mat->m;
624   info->columns_local  = (double)mat->N;
625   info->block_size     = 1.0;
626   ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr);
627   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
628   isend[3] = info->memory;  isend[4] = info->mallocs;
629   if (flag == MAT_LOCAL) {
630     info->nz_used      = isend[0];
631     info->nz_allocated = isend[1];
632     info->nz_unneeded  = isend[2];
633     info->memory       = isend[3];
634     info->mallocs      = isend[4];
635   } else if (flag == MAT_GLOBAL_MAX) {
636     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,A->comm);CHKERRQ(ierr);
637     info->nz_used      = irecv[0];
638     info->nz_allocated = irecv[1];
639     info->nz_unneeded  = irecv[2];
640     info->memory       = irecv[3];
641     info->mallocs      = irecv[4];
642   } else if (flag == MAT_GLOBAL_SUM) {
643     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,A->comm);CHKERRQ(ierr);
644     info->nz_used      = irecv[0];
645     info->nz_allocated = irecv[1];
646     info->nz_unneeded  = irecv[2];
647     info->memory       = irecv[3];
648     info->mallocs      = irecv[4];
649   }
650   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
651   info->fill_ratio_needed = 0;
652   info->factor_mallocs    = 0;
653   PetscFunctionReturn(0);
654 }
655 
656 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
657    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
658    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
659    extern int MatSolve_MPIDense(Mat,Vec,Vec);
660    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
661    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
662    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
663 
664 #undef __FUNC__
665 #define __FUNC__ "MatSetOption_MPIDense"
666 int MatSetOption_MPIDense(Mat A,MatOption op)
667 {
668   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
669 
670   PetscFunctionBegin;
671   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
672       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
673       op == MAT_NEW_NONZERO_LOCATION_ERROR ||
674       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
675       op == MAT_COLUMNS_SORTED ||
676       op == MAT_COLUMNS_UNSORTED) {
677         MatSetOption(a->A,op);
678   } else if (op == MAT_ROW_ORIENTED) {
679         a->roworiented = 1;
680         MatSetOption(a->A,op);
681   } else if (op == MAT_ROWS_SORTED ||
682              op == MAT_ROWS_UNSORTED ||
683              op == MAT_SYMMETRIC ||
684              op == MAT_STRUCTURALLY_SYMMETRIC ||
685              op == MAT_YES_NEW_DIAGONALS ||
686              op == MAT_USE_HASH_TABLE) {
687     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
688   } else if (op == MAT_COLUMN_ORIENTED) {
689     a->roworiented = 0; MatSetOption(a->A,op);
690   } else if (op == MAT_NO_NEW_DIAGONALS) {
691     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
692   } else {
693     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
694   }
695   PetscFunctionReturn(0);
696 }
697 
698 #undef __FUNC__
699 #define __FUNC__ "MatGetSize_MPIDense"
700 int MatGetSize_MPIDense(Mat A,int *m,int *n)
701 {
702   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
703 
704   PetscFunctionBegin;
705   *m = mat->M; *n = mat->N;
706   PetscFunctionReturn(0);
707 }
708 
709 #undef __FUNC__
710 #define __FUNC__ "MatGetLocalSize_MPIDense"
711 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
712 {
713   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
714 
715   PetscFunctionBegin;
716   *m = mat->m; *n = mat->N;
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNC__
721 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
722 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
723 {
724   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
725 
726   PetscFunctionBegin;
727   *m = mat->rstart; *n = mat->rend;
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNC__
732 #define __FUNC__ "MatGetRow_MPIDense"
733 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
734 {
735   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
736   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
737 
738   PetscFunctionBegin;
739   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
740   lrow = row - rstart;
741   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNC__
746 #define __FUNC__ "MatRestoreRow_MPIDense"
747 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
748 {
749   PetscFunctionBegin;
750   if (idx) PetscFree(*idx);
751   if (v) PetscFree(*v);
752   PetscFunctionReturn(0);
753 }
754 
755 #undef __FUNC__
756 #define __FUNC__ "MatNorm_MPIDense"
757 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
758 {
759   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
760   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
761   int          ierr, i, j;
762   double       sum = 0.0;
763   Scalar       *v = mat->v;
764 
765   PetscFunctionBegin;
766   if (mdn->size == 1) {
767     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
768   } else {
769     if (type == NORM_FROBENIUS) {
770       for (i=0; i<mat->n*mat->m; i++ ) {
771 #if defined(USE_PETSC_COMPLEX)
772         sum += real(conj(*v)*(*v)); v++;
773 #else
774         sum += (*v)*(*v); v++;
775 #endif
776       }
777       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
778       *norm = sqrt(*norm);
779       PLogFlops(2*mat->n*mat->m);
780     } else if (type == NORM_1) {
781       double *tmp, *tmp2;
782       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
783       tmp2 = tmp + mdn->N;
784       PetscMemzero(tmp,2*mdn->N*sizeof(double));
785       *norm = 0.0;
786       v = mat->v;
787       for ( j=0; j<mat->n; j++ ) {
788         for ( i=0; i<mat->m; i++ ) {
789           tmp[j] += PetscAbsScalar(*v);  v++;
790         }
791       }
792       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
793       for ( j=0; j<mdn->N; j++ ) {
794         if (tmp2[j] > *norm) *norm = tmp2[j];
795       }
796       PetscFree(tmp);
797       PLogFlops(mat->n*mat->m);
798     } else if (type == NORM_INFINITY) { /* max row norm */
799       double ntemp;
800       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
801       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
802     } else {
803       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
804     }
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNC__
810 #define __FUNC__ "MatTranspose_MPIDense"
811 int MatTranspose_MPIDense(Mat A,Mat *matout)
812 {
813   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
814   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
815   Mat          B;
816   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
817   int          j, i, ierr;
818   Scalar       *v;
819 
820   PetscFunctionBegin;
821   if (matout == PETSC_NULL && M != N) {
822     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
823   }
824   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
825 
826   m = Aloc->m; n = Aloc->n; v = Aloc->v;
827   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
828   for ( j=0; j<n; j++ ) {
829     for (i=0; i<m; i++) rwork[i] = rstart + i;
830     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
831     v   += m;
832   }
833   PetscFree(rwork);
834   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
835   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
836   if (matout != PETSC_NULL) {
837     *matout = B;
838   } else {
839     PetscOps       *Abops;
840     struct _MatOps *Aops;
841 
842     /* This isn't really an in-place transpose, but free data struct from a */
843     PetscFree(a->rowners);
844     ierr = MatDestroy(a->A); CHKERRQ(ierr);
845     if (a->lvec) VecDestroy(a->lvec);
846     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
847     PetscFree(a);
848 
849     /*
850          This is horrible, horrible code. We need to keep the
851       A pointers for the bops and ops but copy everything
852       else from C.
853     */
854     Abops = A->bops;
855     Aops  = A->ops;
856     PetscMemcpy(A,B,sizeof(struct _p_Mat));
857     A->bops = Abops;
858     A->ops  = Aops;
859 
860     PetscHeaderDestroy(B);
861   }
862   PetscFunctionReturn(0);
863 }
864 
865 #include "pinclude/plapack.h"
866 #undef __FUNC__
867 #define __FUNC__ "MatScale_MPIDense"
868 int MatScale_MPIDense(Scalar *alpha,Mat inA)
869 {
870   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
871   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
872   int          one = 1, nz;
873 
874   PetscFunctionBegin;
875   nz = a->m*a->n;
876   BLscal_( &nz, alpha, a->v, &one );
877   PLogFlops(nz);
878   PetscFunctionReturn(0);
879 }
880 
881 static int MatConvertSameType_MPIDense(Mat,Mat *,int);
882 
883 /* -------------------------------------------------------------------*/
884 static struct _MatOps MatOps = {MatSetValues_MPIDense,
885        MatGetRow_MPIDense,MatRestoreRow_MPIDense,
886        MatMult_MPIDense,MatMultAdd_MPIDense,
887        MatMultTrans_MPIDense,MatMultTransAdd_MPIDense,
888 /*       MatSolve_MPIDense,0, */
889        0,0,
890        0,0,
891        0,0,
892 /*       MatLUFactor_MPIDense,0, */
893        0,MatTranspose_MPIDense,
894        MatGetInfo_MPIDense,0,
895        MatGetDiagonal_MPIDense,0,MatNorm_MPIDense,
896        MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense,
897        0,
898        MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense,
899        0,0,
900 /*       0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */
901        0,0,
902        MatGetSize_MPIDense,MatGetLocalSize_MPIDense,
903        MatGetOwnershipRange_MPIDense,
904        0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense,
905        MatConvertSameType_MPIDense,
906        0,0,0,0,0,
907        0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense,
908        0,0,0,MatGetBlockSize_MPIDense};
909 
910 #undef __FUNC__
911 #define __FUNC__ "MatCreateMPIDense"
912 /*@C
913    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
914 
915    Input Parameters:
916 .  comm - MPI communicator
917 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
918 .  n - number of local columns (or PETSC_DECIDE to have calculated
919            if N is given)
920 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
921 .  N - number of global columns (or PETSC_DECIDE to have calculated
922            if n is given)
923 .  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
924    to control all matrix memory allocation.
925 
926    Output Parameter:
927 .  A - the matrix
928 
929    Notes:
930    The dense format is fully compatible with standard Fortran 77
931    storage by columns.
932 
933    The data input variable is intended primarily for Fortran programmers
934    who wish to allocate their own matrix memory space.  Most users should
935    set data=PETSC_NULL.
936 
937    The user MUST specify either the local or global matrix dimensions
938    (possibly both).
939 
940    Currently, the only parallel dense matrix decomposition is by rows,
941    so that n=N and each submatrix owns all of the global columns.
942 
943 .keywords: matrix, dense, parallel
944 
945 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
946 @*/
947 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
948 {
949   Mat          mat;
950   Mat_MPIDense *a;
951   int          ierr, i,flg;
952 
953   PetscFunctionBegin;
954   /* Note:  For now, when data is specified above, this assumes the user correctly
955    allocates the local dense storage space.  We should add error checking. */
956 
957   *A = 0;
958   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,comm,MatDestroy,MatView);
959   PLogObjectCreate(mat);
960   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
961   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
962   mat->ops->destroy    = MatDestroy_MPIDense;
963   mat->ops->view       = MatView_MPIDense;
964   mat->factor          = 0;
965   mat->mapping         = 0;
966 
967   a->factor       = 0;
968   mat->insertmode = NOT_SET_VALUES;
969   MPI_Comm_rank(comm,&a->rank);
970   MPI_Comm_size(comm,&a->size);
971 
972   if (M == PETSC_DECIDE) {ierr = MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);}
973   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
974 
975   /* each row stores all columns */
976   if (N == PETSC_DECIDE) N = n;
977   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
978   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
979   a->N = mat->N = N;
980   a->M = mat->M = M;
981   a->m = mat->m = m;
982   a->n = mat->n = n;
983 
984   /* build local table of row and column ownerships */
985   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
986   a->cowners = a->rowners + a->size + 1;
987   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
988   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
989   a->rowners[0] = 0;
990   for ( i=2; i<=a->size; i++ ) {
991     a->rowners[i] += a->rowners[i-1];
992   }
993   a->rstart = a->rowners[a->rank];
994   a->rend   = a->rowners[a->rank+1];
995   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
996   a->cowners[0] = 0;
997   for ( i=2; i<=a->size; i++ ) {
998     a->cowners[i] += a->cowners[i-1];
999   }
1000 
1001   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
1002   PLogObjectParent(mat,a->A);
1003 
1004   /* build cache for off array entries formed */
1005   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1006 
1007   /* stuff used for matrix vector multiply */
1008   a->lvec        = 0;
1009   a->Mvctx       = 0;
1010   a->roworiented = 1;
1011 
1012   *A = mat;
1013   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1014   if (flg) {
1015     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1016   }
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNC__
1021 #define __FUNC__ "MatConvertSameType_MPIDense"
1022 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues)
1023 {
1024   Mat          mat;
1025   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1026   int          ierr;
1027   FactorCtx    *factor;
1028 
1029   PetscFunctionBegin;
1030   *newmat       = 0;
1031   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,A->comm,MatDestroy,MatView);
1032   PLogObjectCreate(mat);
1033   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
1034   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
1035   mat->ops->destroy   = MatDestroy_MPIDense;
1036   mat->ops->view      = MatView_MPIDense;
1037   mat->factor         = A->factor;
1038   mat->assembled      = PETSC_TRUE;
1039 
1040   a->m = mat->m = oldmat->m;
1041   a->n = mat->n = oldmat->n;
1042   a->M = mat->M = oldmat->M;
1043   a->N = mat->N = oldmat->N;
1044   if (oldmat->factor) {
1045     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
1046     /* copy factor contents ... add this code! */
1047   } else a->factor = 0;
1048 
1049   a->rstart       = oldmat->rstart;
1050   a->rend         = oldmat->rend;
1051   a->size         = oldmat->size;
1052   a->rank         = oldmat->rank;
1053   mat->insertmode = NOT_SET_VALUES;
1054 
1055   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1056   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1057   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1058   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1059 
1060   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1061   PLogObjectParent(mat,a->lvec);
1062   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1063   PLogObjectParent(mat,a->Mvctx);
1064   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1065   PLogObjectParent(mat,a->A);
1066   *newmat = mat;
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #include "sys.h"
1071 
1072 #undef __FUNC__
1073 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1074 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1075 {
1076   int        *rowners, i,size,rank,m,ierr,nz,j;
1077   Scalar     *array,*vals,*vals_ptr;
1078   MPI_Status status;
1079 
1080   PetscFunctionBegin;
1081   MPI_Comm_rank(comm,&rank);
1082   MPI_Comm_size(comm,&size);
1083 
1084   /* determine ownership of all rows */
1085   m          = M/size + ((M % size) > rank);
1086   rowners    = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1087   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1088   rowners[0] = 0;
1089   for ( i=2; i<=size; i++ ) {
1090     rowners[i] += rowners[i-1];
1091   }
1092 
1093   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1094   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
1095 
1096   if (!rank) {
1097     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1098 
1099     /* read in my part of the matrix numerical values  */
1100     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR); CHKERRQ(ierr);
1101 
1102     /* insert into matrix-by row (this is why cannot directly read into array */
1103     vals_ptr = vals;
1104     for ( i=0; i<m; i++ ) {
1105       for ( j=0; j<N; j++ ) {
1106         array[i + j*m] = *vals_ptr++;
1107       }
1108     }
1109 
1110     /* read in other processors and ship out */
1111     for ( i=1; i<size; i++ ) {
1112       nz   = (rowners[i+1] - rowners[i])*N;
1113       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1114       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1115     }
1116   } else {
1117     /* receive numeric values */
1118     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1119 
1120     /* receive message of values*/
1121     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1122 
1123     /* insert into matrix-by row (this is why cannot directly read into array */
1124     vals_ptr = vals;
1125     for ( i=0; i<m; i++ ) {
1126       for ( j=0; j<N; j++ ) {
1127         array[i + j*m] = *vals_ptr++;
1128       }
1129     }
1130   }
1131   PetscFree(rowners);
1132   PetscFree(vals);
1133   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1134   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 
1139 #undef __FUNC__
1140 #define __FUNC__ "MatLoad_MPIDense"
1141 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1142 {
1143   Mat          A;
1144   Scalar       *vals,*svals;
1145   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1146   MPI_Status   status;
1147   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1148   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1149   int          tag = ((PetscObject)viewer)->tag;
1150   int          i, nz, ierr, j,rstart, rend, fd;
1151 
1152   PetscFunctionBegin;
1153   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1154   if (!rank) {
1155     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1156     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1157     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1158   }
1159 
1160   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1161   M = header[1]; N = header[2]; nz = header[3];
1162 
1163   /*
1164        Handle case where matrix is stored on disk as a dense matrix
1165   */
1166   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1167     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1168     PetscFunctionReturn(0);
1169   }
1170 
1171   /* determine ownership of all rows */
1172   m          = M/size + ((M % size) > rank);
1173   rowners    = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1174   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1175   rowners[0] = 0;
1176   for ( i=2; i<=size; i++ ) {
1177     rowners[i] += rowners[i-1];
1178   }
1179   rstart = rowners[rank];
1180   rend   = rowners[rank+1];
1181 
1182   /* distribute row lengths to all processors */
1183   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1184   offlens = ourlens + (rend-rstart);
1185   if (!rank) {
1186     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1187     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1188     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1189     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1190     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1191     PetscFree(sndcounts);
1192   } else {
1193     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1194   }
1195 
1196   if (!rank) {
1197     /* calculate the number of nonzeros on each processor */
1198     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1199     PetscMemzero(procsnz,size*sizeof(int));
1200     for ( i=0; i<size; i++ ) {
1201       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1202         procsnz[i] += rowlengths[j];
1203       }
1204     }
1205     PetscFree(rowlengths);
1206 
1207     /* determine max buffer needed and allocate it */
1208     maxnz = 0;
1209     for ( i=0; i<size; i++ ) {
1210       maxnz = PetscMax(maxnz,procsnz[i]);
1211     }
1212     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1213 
1214     /* read in my part of the matrix column indices  */
1215     nz = procsnz[0];
1216     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1217     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1218 
1219     /* read in every one elses and ship off */
1220     for ( i=1; i<size; i++ ) {
1221       nz   = procsnz[i];
1222       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1223       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1224     }
1225     PetscFree(cols);
1226   } else {
1227     /* determine buffer space needed for message */
1228     nz = 0;
1229     for ( i=0; i<m; i++ ) {
1230       nz += ourlens[i];
1231     }
1232     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1233 
1234     /* receive message of column indices*/
1235     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1236     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1237     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1238   }
1239 
1240   /* loop over local rows, determining number of off diagonal entries */
1241   PetscMemzero(offlens,m*sizeof(int));
1242   jj = 0;
1243   for ( i=0; i<m; i++ ) {
1244     for ( j=0; j<ourlens[i]; j++ ) {
1245       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1246       jj++;
1247     }
1248   }
1249 
1250   /* create our matrix */
1251   for ( i=0; i<m; i++ ) {
1252     ourlens[i] -= offlens[i];
1253   }
1254   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1255   A = *newmat;
1256   for ( i=0; i<m; i++ ) {
1257     ourlens[i] += offlens[i];
1258   }
1259 
1260   if (!rank) {
1261     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1262 
1263     /* read in my part of the matrix numerical values  */
1264     nz = procsnz[0];
1265     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1266 
1267     /* insert into matrix */
1268     jj      = rstart;
1269     smycols = mycols;
1270     svals   = vals;
1271     for ( i=0; i<m; i++ ) {
1272       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1273       smycols += ourlens[i];
1274       svals   += ourlens[i];
1275       jj++;
1276     }
1277 
1278     /* read in other processors and ship out */
1279     for ( i=1; i<size; i++ ) {
1280       nz   = procsnz[i];
1281       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1282       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1283     }
1284     PetscFree(procsnz);
1285   } else {
1286     /* receive numeric values */
1287     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1288 
1289     /* receive message of values*/
1290     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1291     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1292     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1293 
1294     /* insert into matrix */
1295     jj      = rstart;
1296     smycols = mycols;
1297     svals   = vals;
1298     for ( i=0; i<m; i++ ) {
1299       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1300       smycols += ourlens[i];
1301       svals   += ourlens[i];
1302       jj++;
1303     }
1304   }
1305   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1306 
1307   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1308   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 
1313 
1314 
1315 
1316