xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4787f7683dc5ea85e35b4269f7afceca0dae908f)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.104 1999/02/15 20:16:39 bsmith Exp balay $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 #undef __FUNC__
13 #define __FUNC__ "MatSetValues_MPIDense"
14 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
15 {
16   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
17   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
18   int          roworiented = A->roworiented;
19 
20   PetscFunctionBegin;
21   for ( i=0; i<m; i++ ) {
22     if (idxm[i] < 0) continue;
23     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
24     if (idxm[i] >= rstart && idxm[i] < rend) {
25       row = idxm[i] - rstart;
26       if (roworiented) {
27         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
28       } else {
29         for ( j=0; j<n; j++ ) {
30           if (idxn[j] < 0) continue;
31           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
32           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
33         }
34       }
35     } else {
36       if (roworiented) {
37         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
38       } else { /* must stash each seperately */
39         row = idxm[i];
40         for ( j=0; j<n; j++ ) {
41           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
42         }
43       }
44     }
45   }
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNC__
50 #define __FUNC__ "MatGetValues_MPIDense"
51 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
52 {
53   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
54   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
55 
56   PetscFunctionBegin;
57   for ( i=0; i<m; i++ ) {
58     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
59     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
60     if (idxm[i] >= rstart && idxm[i] < rend) {
61       row = idxm[i] - rstart;
62       for ( j=0; j<n; j++ ) {
63         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
64         if (idxn[j] >= mdn->N) {
65           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
66         }
67         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
68       }
69     } else {
70       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
71     }
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNC__
77 #define __FUNC__ "MatGetArray_MPIDense"
78 int MatGetArray_MPIDense(Mat A,Scalar **array)
79 {
80   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
81   int          ierr;
82 
83   PetscFunctionBegin;
84   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatRestoreArray_MPIDense"
90 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
91 {
92   PetscFunctionBegin;
93   PetscFunctionReturn(0);
94 }
95 
96 #undef __FUNC__
97 #define __FUNC__ "MatAssemblyBegin_MPIDense"
98 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
99 {
100   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
101   MPI_Comm     comm = mat->comm;
102   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
103   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
104   int          tag = mat->tag, *owner,*starts,count,ierr;
105   InsertMode   addv;
106   MPI_Request  *send_waits,*recv_waits;
107   Scalar       *rvalues,*svalues;
108 
109   PetscFunctionBegin;
110   /* make sure all processors are either in INSERTMODE or ADDMODE */
111   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
112   if (addv == (ADD_VALUES|INSERT_VALUES)) {
113     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
114   }
115   mat->insertmode = addv; /* in case this processor had no cache */
116 
117   /*  first count number of contributors to each processor */
118   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
119   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
120   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
121   for ( i=0; i<mdn->stash.n; i++ ) {
122     idx = mdn->stash.idx[i];
123     for ( j=0; j<size; j++ ) {
124       if (idx >= owners[j] && idx < owners[j+1]) {
125         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
126       }
127     }
128   }
129   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
130 
131   /* inform other processors of number of messages and max length*/
132   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
133   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
134   nreceives = work[rank];
135   if (nreceives > size) SETERRQ(PETSC_ERR_PLIB,0,"Internal PETSc error");
136   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
137   nmax = work[rank];
138   PetscFree(work);
139 
140   /* post receives:
141        1) each message will consist of ordered pairs
142      (global index,value) we store the global index as a double
143      to simplify the message passing.
144        2) since we don't know how long each individual message is we
145      allocate the largest needed buffer for each receive. Potentially
146      this is a lot of wasted space.
147 
148        This could be done better.
149   */
150   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
151   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
152   for ( i=0; i<nreceives; i++ ) {
153     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
154   }
155 
156   /* do sends:
157       1) starts[i] gives the starting index in svalues for stuff going to
158          the ith processor
159   */
160   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
161   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
162   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
163   starts[0] = 0;
164   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
165   for ( i=0; i<mdn->stash.n; i++ ) {
166     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
167     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
168     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
169   }
170   PetscFree(owner);
171   starts[0] = 0;
172   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
173   count = 0;
174   for ( i=0; i<size; i++ ) {
175     if (procs[i]) {
176       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
177     }
178   }
179   PetscFree(starts); PetscFree(nprocs);
180 
181   /* Free cache space */
182   PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n);
183   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
184 
185   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
186   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
187   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
188   mdn->rmax       = nmax;
189 
190   PetscFunctionReturn(0);
191 }
192 extern int MatSetUpMultiply_MPIDense(Mat);
193 
194 #undef __FUNC__
195 #define __FUNC__ "MatAssemblyEnd_MPIDense"
196 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
197 {
198   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
199   MPI_Status   *send_status,recv_status;
200   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
201   Scalar       *values,val;
202   InsertMode   addv = mat->insertmode;
203 
204   PetscFunctionBegin;
205   /*  wait on receives */
206   while (count) {
207     ierr = MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
208     /* unpack receives into our local space */
209     values = mdn->rvalues + 3*imdex*mdn->rmax;
210     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
211     n = n/3;
212     for ( i=0; i<n; i++ ) {
213       row = (int) PetscReal(values[3*i]) - mdn->rstart;
214       col = (int) PetscReal(values[3*i+1]);
215       val = values[3*i+2];
216       if (col >= 0 && col < mdn->N) {
217         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
218       }
219       else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid column");}
220     }
221     count--;
222   }
223   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
224 
225   /* wait on sends */
226   if (mdn->nsends) {
227     send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
228     ierr        = MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);CHKERRQ(ierr);
229     PetscFree(send_status);
230   }
231   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNC__
243 #define __FUNC__ "MatZeroEntries_MPIDense"
244 int MatZeroEntries_MPIDense(Mat A)
245 {
246   int          ierr;
247   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNC__
255 #define __FUNC__ "MatGetBlockSize_MPIDense"
256 int MatGetBlockSize_MPIDense(Mat A,int *bs)
257 {
258   PetscFunctionBegin;
259   *bs = 1;
260   PetscFunctionReturn(0);
261 }
262 
263 /* the code does not do the diagonal entries correctly unless the
264    matrix is square and the column and row owerships are identical.
265    This is a BUG. The only way to fix it seems to be to access
266    mdn->A and mdn->B directly and not through the MatZeroRows()
267    routine.
268 */
269 #undef __FUNC__
270 #define __FUNC__ "MatZeroRows_MPIDense"
271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
272 {
273   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
274   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
275   int            *procs,*nprocs,j,found,idx,nsends,*work;
276   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
277   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
278   int            *lens,imdex,*lrows,*values;
279   MPI_Comm       comm = A->comm;
280   MPI_Request    *send_waits,*recv_waits;
281   MPI_Status     recv_status,*send_status;
282   IS             istmp;
283 
284   PetscFunctionBegin;
285   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
286   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
287 
288   /*  first count number of contributors to each processor */
289   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
290   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
291   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
292   for ( i=0; i<N; i++ ) {
293     idx = rows[i];
294     found = 0;
295     for ( j=0; j<size; j++ ) {
296       if (idx >= owners[j] && idx < owners[j+1]) {
297         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
298       }
299     }
300     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
301   }
302   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
303 
304   /* inform other processors of number of messages and max length*/
305   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
306   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
307   nrecvs = work[rank];
308   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
309   nmax   = work[rank];
310   PetscFree(work);
311 
312   /* post receives:   */
313   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
314   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
315   for ( i=0; i<nrecvs; i++ ) {
316     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
317   }
318 
319   /* do sends:
320       1) starts[i] gives the starting index in svalues for stuff going to
321          the ith processor
322   */
323   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
324   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
325   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
326   starts[0]  = 0;
327   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
328   for ( i=0; i<N; i++ ) {
329     svalues[starts[owner[i]]++] = rows[i];
330   }
331   ISRestoreIndices(is,&rows);
332 
333   starts[0] = 0;
334   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
335   count = 0;
336   for ( i=0; i<size; i++ ) {
337     if (procs[i]) {
338       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
339     }
340   }
341   PetscFree(starts);
342 
343   base = owners[rank];
344 
345   /*  wait on receives */
346   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
347   source = lens + nrecvs;
348   count  = nrecvs; slen = 0;
349   while (count) {
350     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
351     /* unpack receives into our local space */
352     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
353     source[imdex]  = recv_status.MPI_SOURCE;
354     lens[imdex]  = n;
355     slen += n;
356     count--;
357   }
358   PetscFree(recv_waits);
359 
360   /* move the data into the send scatter */
361   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
362   count = 0;
363   for ( i=0; i<nrecvs; i++ ) {
364     values = rvalues + i*nmax;
365     for ( j=0; j<lens[i]; j++ ) {
366       lrows[count++] = values[j] - base;
367     }
368   }
369   PetscFree(rvalues); PetscFree(lens);
370   PetscFree(owner); PetscFree(nprocs);
371 
372   /* actually zap the local rows */
373   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
374   PLogObjectParent(A,istmp);
375   PetscFree(lrows);
376   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
377   ierr = ISDestroy(istmp); CHKERRQ(ierr);
378 
379   /* wait on sends */
380   if (nsends) {
381     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
382     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
383     PetscFree(send_status);
384   }
385   PetscFree(send_waits); PetscFree(svalues);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNC__
391 #define __FUNC__ "MatMult_MPIDense"
392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
395   int          ierr;
396 
397   PetscFunctionBegin;
398   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatMultAdd_MPIDense"
406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
409   int          ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNC__
419 #define __FUNC__ "MatMultTrans_MPIDense"
420 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
421 {
422   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
423   int          ierr;
424   Scalar       zero = 0.0;
425 
426   PetscFunctionBegin;
427   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
428   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
429   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
430   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNC__
435 #define __FUNC__ "MatMultTransAdd_MPIDense"
436 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
439   int          ierr;
440 
441   PetscFunctionBegin;
442   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
443   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
444   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
445   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNC__
450 #define __FUNC__ "MatGetDiagonal_MPIDense"
451 int MatGetDiagonal_MPIDense(Mat A,Vec v)
452 {
453   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
454   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
455   int          ierr, len, i, n, m = a->m, radd;
456   Scalar       *x, zero = 0.0;
457 
458   PetscFunctionBegin;
459   VecSet(&zero,v);
460   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
461   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
462   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
463   len = PetscMin(aloc->m,aloc->n);
464   radd = a->rstart*m;
465   for ( i=0; i<len; i++ ) {
466     x[i] = aloc->v[radd + i*m + i];
467   }
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNC__
472 #define __FUNC__ "MatDestroy_MPIDense"
473 int MatDestroy_MPIDense(Mat mat)
474 {
475   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
476   int          ierr;
477 
478   PetscFunctionBegin;
479   if (--mat->refct > 0) PetscFunctionReturn(0);
480 
481   if (mat->mapping) {
482     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
483   }
484   if (mat->bmapping) {
485     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
486   }
487 #if defined(USE_PETSC_LOG)
488   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
489 #endif
490   PetscFree(mdn->rowners);
491   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
492   if (mdn->lvec)   VecDestroy(mdn->lvec);
493   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
494   if (mdn->factor) {
495     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
496     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
497     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
498     PetscFree(mdn->factor);
499   }
500   PetscFree(mdn);
501   if (mat->rmap) {
502     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
503   }
504   if (mat->cmap) {
505     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
506   }
507   PLogObjectDestroy(mat);
508   PetscHeaderDestroy(mat);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNC__
513 #define __FUNC__ "MatView_MPIDense_Binary"
514 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
515 {
516   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
517   int          ierr;
518 
519   PetscFunctionBegin;
520   if (mdn->size == 1) {
521     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
522   }
523   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNC__
528 #define __FUNC__ "MatView_MPIDense_ASCII"
529 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
530 {
531   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
532   int          ierr, format, size = mdn->size, rank = mdn->rank;
533   FILE         *fd;
534   ViewerType   vtype;
535 
536   PetscFunctionBegin;
537   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
538   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
539   ierr = ViewerGetFormat(viewer,&format);
540   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
541     MatInfo info;
542     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
543     PetscSequentialPhaseBegin(mat->comm,1);
544       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
545          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
546       fflush(fd);
547     PetscSequentialPhaseEnd(mat->comm,1);
548     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
549     PetscFunctionReturn(0);
550   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
551     PetscFunctionReturn(0);
552   }
553 
554   if (size == 1) {
555     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
556   } else {
557     /* assemble the entire matrix onto first processor. */
558     Mat          A;
559     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
560     Scalar       *vals;
561     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
562 
563     if (!rank) {
564       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
565     } else {
566       ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
567     }
568     PLogObjectParent(mat,A);
569 
570     /* Copy the matrix ... This isn't the most efficient means,
571        but it's quick for now */
572     row = mdn->rstart; m = Amdn->m;
573     for ( i=0; i<m; i++ ) {
574       ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
575       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
576       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
577       row++;
578     }
579 
580     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
581     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
582     if (!rank) {
583       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
584     }
585     ierr = MatDestroy(A); CHKERRQ(ierr);
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNC__
591 #define __FUNC__ "MatView_MPIDense"
592 int MatView_MPIDense(Mat mat,Viewer viewer)
593 {
594   int          ierr;
595   ViewerType   vtype;
596 
597   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
598   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
599     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
600   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
601     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
602   } else {
603     SETERRQ(1,1,"Viewer type not supported by PETSc object");
604   }
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNC__
609 #define __FUNC__ "MatGetInfo_MPIDense"
610 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
611 {
612   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
613   Mat          mdn = mat->A;
614   int          ierr;
615   double       isend[5], irecv[5];
616 
617   PetscFunctionBegin;
618   info->rows_global    = (double)mat->M;
619   info->columns_global = (double)mat->N;
620   info->rows_local     = (double)mat->m;
621   info->columns_local  = (double)mat->N;
622   info->block_size     = 1.0;
623   ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr);
624   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
625   isend[3] = info->memory;  isend[4] = info->mallocs;
626   if (flag == MAT_LOCAL) {
627     info->nz_used      = isend[0];
628     info->nz_allocated = isend[1];
629     info->nz_unneeded  = isend[2];
630     info->memory       = isend[3];
631     info->mallocs      = isend[4];
632   } else if (flag == MAT_GLOBAL_MAX) {
633     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
634     info->nz_used      = irecv[0];
635     info->nz_allocated = irecv[1];
636     info->nz_unneeded  = irecv[2];
637     info->memory       = irecv[3];
638     info->mallocs      = irecv[4];
639   } else if (flag == MAT_GLOBAL_SUM) {
640     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
641     info->nz_used      = irecv[0];
642     info->nz_allocated = irecv[1];
643     info->nz_unneeded  = irecv[2];
644     info->memory       = irecv[3];
645     info->mallocs      = irecv[4];
646   }
647   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
648   info->fill_ratio_needed = 0;
649   info->factor_mallocs    = 0;
650   PetscFunctionReturn(0);
651 }
652 
653 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
654    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
655    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
656    extern int MatSolve_MPIDense(Mat,Vec,Vec);
657    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
658    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
659    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
660 
661 #undef __FUNC__
662 #define __FUNC__ "MatSetOption_MPIDense"
663 int MatSetOption_MPIDense(Mat A,MatOption op)
664 {
665   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
666 
667   PetscFunctionBegin;
668   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
669       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
670       op == MAT_NEW_NONZERO_LOCATION_ERR ||
671       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
672       op == MAT_COLUMNS_SORTED ||
673       op == MAT_COLUMNS_UNSORTED) {
674         MatSetOption(a->A,op);
675   } else if (op == MAT_ROW_ORIENTED) {
676         a->roworiented = 1;
677         MatSetOption(a->A,op);
678   } else if (op == MAT_ROWS_SORTED ||
679              op == MAT_ROWS_UNSORTED ||
680              op == MAT_SYMMETRIC ||
681              op == MAT_STRUCTURALLY_SYMMETRIC ||
682              op == MAT_YES_NEW_DIAGONALS ||
683              op == MAT_USE_HASH_TABLE) {
684     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
685   } else if (op == MAT_COLUMN_ORIENTED) {
686     a->roworiented = 0; MatSetOption(a->A,op);
687   } else if (op == MAT_NO_NEW_DIAGONALS) {
688     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
689   } else {
690     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNC__
696 #define __FUNC__ "MatGetSize_MPIDense"
697 int MatGetSize_MPIDense(Mat A,int *m,int *n)
698 {
699   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
700 
701   PetscFunctionBegin;
702   *m = mat->M; *n = mat->N;
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNC__
707 #define __FUNC__ "MatGetLocalSize_MPIDense"
708 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
709 {
710   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
711 
712   PetscFunctionBegin;
713   *m = mat->m; *n = mat->N;
714   PetscFunctionReturn(0);
715 }
716 
717 #undef __FUNC__
718 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
719 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
720 {
721   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
722 
723   PetscFunctionBegin;
724   *m = mat->rstart; *n = mat->rend;
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNC__
729 #define __FUNC__ "MatGetRow_MPIDense"
730 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
731 {
732   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
733   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
734 
735   PetscFunctionBegin;
736   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
737   lrow = row - rstart;
738   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNC__
743 #define __FUNC__ "MatRestoreRow_MPIDense"
744 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
745 {
746   PetscFunctionBegin;
747   if (idx) PetscFree(*idx);
748   if (v) PetscFree(*v);
749   PetscFunctionReturn(0);
750 }
751 
752 #undef __FUNC__
753 #define __FUNC__ "MatNorm_MPIDense"
754 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
755 {
756   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
757   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
758   int          ierr, i, j;
759   double       sum = 0.0;
760   Scalar       *v = mat->v;
761 
762   PetscFunctionBegin;
763   if (mdn->size == 1) {
764     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
765   } else {
766     if (type == NORM_FROBENIUS) {
767       for (i=0; i<mat->n*mat->m; i++ ) {
768 #if defined(USE_PETSC_COMPLEX)
769         sum += PetscReal(PetscConj(*v)*(*v)); v++;
770 #else
771         sum += (*v)*(*v); v++;
772 #endif
773       }
774       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
775       *norm = sqrt(*norm);
776       PLogFlops(2*mat->n*mat->m);
777     } else if (type == NORM_1) {
778       double *tmp, *tmp2;
779       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
780       tmp2 = tmp + mdn->N;
781       PetscMemzero(tmp,2*mdn->N*sizeof(double));
782       *norm = 0.0;
783       v = mat->v;
784       for ( j=0; j<mat->n; j++ ) {
785         for ( i=0; i<mat->m; i++ ) {
786           tmp[j] += PetscAbsScalar(*v);  v++;
787         }
788       }
789       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
790       for ( j=0; j<mdn->N; j++ ) {
791         if (tmp2[j] > *norm) *norm = tmp2[j];
792       }
793       PetscFree(tmp);
794       PLogFlops(mat->n*mat->m);
795     } else if (type == NORM_INFINITY) { /* max row norm */
796       double ntemp;
797       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
798       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
799     } else {
800       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
801     }
802   }
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNC__
807 #define __FUNC__ "MatTranspose_MPIDense"
808 int MatTranspose_MPIDense(Mat A,Mat *matout)
809 {
810   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
811   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
812   Mat          B;
813   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
814   int          j, i, ierr;
815   Scalar       *v;
816 
817   PetscFunctionBegin;
818   if (matout == PETSC_NULL && M != N) {
819     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
820   }
821   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
822 
823   m = Aloc->m; n = Aloc->n; v = Aloc->v;
824   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
825   for ( j=0; j<n; j++ ) {
826     for (i=0; i<m; i++) rwork[i] = rstart + i;
827     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
828     v   += m;
829   }
830   PetscFree(rwork);
831   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
832   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
833   if (matout != PETSC_NULL) {
834     *matout = B;
835   } else {
836     PetscOps *Abops;
837     MatOps   Aops;
838 
839     /* This isn't really an in-place transpose, but free data struct from a */
840     PetscFree(a->rowners);
841     ierr = MatDestroy(a->A); CHKERRQ(ierr);
842     if (a->lvec) VecDestroy(a->lvec);
843     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
844     PetscFree(a);
845 
846     /*
847          This is horrible, horrible code. We need to keep the
848       A pointers for the bops and ops but copy everything
849       else from C.
850     */
851     Abops = A->bops;
852     Aops  = A->ops;
853     PetscMemcpy(A,B,sizeof(struct _p_Mat));
854     A->bops = Abops;
855     A->ops  = Aops;
856 
857     PetscHeaderDestroy(B);
858   }
859   PetscFunctionReturn(0);
860 }
861 
862 #include "pinclude/blaslapack.h"
863 #undef __FUNC__
864 #define __FUNC__ "MatScale_MPIDense"
865 int MatScale_MPIDense(Scalar *alpha,Mat inA)
866 {
867   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
868   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
869   int          one = 1, nz;
870 
871   PetscFunctionBegin;
872   nz = a->m*a->n;
873   BLscal_( &nz, alpha, a->v, &one );
874   PLogFlops(nz);
875   PetscFunctionReturn(0);
876 }
877 
878 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
879 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
880 
881 /* -------------------------------------------------------------------*/
882 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
883        MatGetRow_MPIDense,
884        MatRestoreRow_MPIDense,
885        MatMult_MPIDense,
886        MatMultAdd_MPIDense,
887        MatMultTrans_MPIDense,
888        MatMultTransAdd_MPIDense,
889        0,
890        0,
891        0,
892        0,
893        0,
894        0,
895        0,
896        MatTranspose_MPIDense,
897        MatGetInfo_MPIDense,0,
898        MatGetDiagonal_MPIDense,
899        0,
900        MatNorm_MPIDense,
901        MatAssemblyBegin_MPIDense,
902        MatAssemblyEnd_MPIDense,
903        0,
904        MatSetOption_MPIDense,
905        MatZeroEntries_MPIDense,
906        MatZeroRows_MPIDense,
907        0,
908        0,
909        0,
910        0,
911        MatGetSize_MPIDense,
912        MatGetLocalSize_MPIDense,
913        MatGetOwnershipRange_MPIDense,
914        0,
915        0,
916        MatGetArray_MPIDense,
917        MatRestoreArray_MPIDense,
918        MatDuplicate_MPIDense,
919        0,
920        0,
921        0,
922        0,
923        0,
924        MatGetSubMatrices_MPIDense,
925        0,
926        MatGetValues_MPIDense,
927        0,
928        0,
929        MatScale_MPIDense,
930        0,
931        0,
932        0,
933        MatGetBlockSize_MPIDense,
934        0,
935        0,
936        0,
937        0,
938        0,
939        0,
940        0,
941        0,
942        0,
943        0,
944        0,
945        0,
946        MatGetMaps_Petsc};
947 
948 #undef __FUNC__
949 #define __FUNC__ "MatCreateMPIDense"
950 /*@C
951    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
952 
953    Collective on MPI_Comm
954 
955    Input Parameters:
956 +  comm - MPI communicator
957 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
958 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
959 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
960 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
961 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
962    to control all matrix memory allocation.
963 
964    Output Parameter:
965 .  A - the matrix
966 
967    Notes:
968    The dense format is fully compatible with standard Fortran 77
969    storage by columns.
970 
971    The data input variable is intended primarily for Fortran programmers
972    who wish to allocate their own matrix memory space.  Most users should
973    set data=PETSC_NULL.
974 
975    The user MUST specify either the local or global matrix dimensions
976    (possibly both).
977 
978    Currently, the only parallel dense matrix decomposition is by rows,
979    so that n=N and each submatrix owns all of the global columns.
980 
981    Level: intermediate
982 
983 .keywords: matrix, dense, parallel
984 
985 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
986 @*/
987 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
988 {
989   Mat          mat;
990   Mat_MPIDense *a;
991   int          ierr, i,flg;
992 
993   PetscFunctionBegin;
994   /* Note:  For now, when data is specified above, this assumes the user correctly
995    allocates the local dense storage space.  We should add error checking. */
996 
997   *A = 0;
998   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
999   PLogObjectCreate(mat);
1000   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
1001   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1002   mat->ops->destroy    = MatDestroy_MPIDense;
1003   mat->ops->view       = MatView_MPIDense;
1004   mat->factor          = 0;
1005   mat->mapping         = 0;
1006 
1007   a->factor       = 0;
1008   mat->insertmode = NOT_SET_VALUES;
1009   MPI_Comm_rank(comm,&a->rank);
1010   MPI_Comm_size(comm,&a->size);
1011 
1012   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1013 
1014   /*
1015      The computation of n is wrong below, n should represent the number of local
1016      rows in the right (column vector)
1017   */
1018 
1019   /* each row stores all columns */
1020   if (N == PETSC_DECIDE) N = n;
1021   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1022   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
1023   a->N = mat->N = N;
1024   a->M = mat->M = M;
1025   a->m = mat->m = m;
1026   a->n = mat->n = n;
1027 
1028   /* the information in the maps duplicates the information computed below, eventually
1029      we should remove the duplicate information that is not contained in the maps */
1030   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1031   ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr);
1032 
1033   /* build local table of row and column ownerships */
1034   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1035   a->cowners = a->rowners + a->size + 1;
1036   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1037   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1038   a->rowners[0] = 0;
1039   for ( i=2; i<=a->size; i++ ) {
1040     a->rowners[i] += a->rowners[i-1];
1041   }
1042   a->rstart = a->rowners[a->rank];
1043   a->rend   = a->rowners[a->rank+1];
1044   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1045   a->cowners[0] = 0;
1046   for ( i=2; i<=a->size; i++ ) {
1047     a->cowners[i] += a->cowners[i-1];
1048   }
1049 
1050   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
1051   PLogObjectParent(mat,a->A);
1052 
1053   /* build cache for off array entries formed */
1054   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1055 
1056   /* stuff used for matrix vector multiply */
1057   a->lvec        = 0;
1058   a->Mvctx       = 0;
1059   a->roworiented = 1;
1060 
1061   *A = mat;
1062   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1063   if (flg) {
1064     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNC__
1070 #define __FUNC__ "MatDuplicate_MPIDense"
1071 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1072 {
1073   Mat          mat;
1074   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1075   int          ierr;
1076   FactorCtx    *factor;
1077 
1078   PetscFunctionBegin;
1079   *newmat       = 0;
1080   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1081   PLogObjectCreate(mat);
1082   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
1083   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1084   mat->ops->destroy   = MatDestroy_MPIDense;
1085   mat->ops->view      = MatView_MPIDense;
1086   mat->factor         = A->factor;
1087   mat->assembled      = PETSC_TRUE;
1088 
1089   a->m = mat->m = oldmat->m;
1090   a->n = mat->n = oldmat->n;
1091   a->M = mat->M = oldmat->M;
1092   a->N = mat->N = oldmat->N;
1093   if (oldmat->factor) {
1094     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
1095     /* copy factor contents ... add this code! */
1096   } else a->factor = 0;
1097 
1098   a->rstart       = oldmat->rstart;
1099   a->rend         = oldmat->rend;
1100   a->size         = oldmat->size;
1101   a->rank         = oldmat->rank;
1102   mat->insertmode = NOT_SET_VALUES;
1103 
1104   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1105   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1106   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1107   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1108 
1109   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1110   PLogObjectParent(mat,a->lvec);
1111   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1112   PLogObjectParent(mat,a->Mvctx);
1113   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
1114   PLogObjectParent(mat,a->A);
1115   *newmat = mat;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #include "sys.h"
1120 
1121 #undef __FUNC__
1122 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1123 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1124 {
1125   int        *rowners, i,size,rank,m,ierr,nz,j;
1126   Scalar     *array,*vals,*vals_ptr;
1127   MPI_Status status;
1128 
1129   PetscFunctionBegin;
1130   MPI_Comm_rank(comm,&rank);
1131   MPI_Comm_size(comm,&size);
1132 
1133   /* determine ownership of all rows */
1134   m          = M/size + ((M % size) > rank);
1135   rowners    = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1136   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1137   rowners[0] = 0;
1138   for ( i=2; i<=size; i++ ) {
1139     rowners[i] += rowners[i-1];
1140   }
1141 
1142   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1143   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
1144 
1145   if (!rank) {
1146     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1147 
1148     /* read in my part of the matrix numerical values  */
1149     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR); CHKERRQ(ierr);
1150 
1151     /* insert into matrix-by row (this is why cannot directly read into array */
1152     vals_ptr = vals;
1153     for ( i=0; i<m; i++ ) {
1154       for ( j=0; j<N; j++ ) {
1155         array[i + j*m] = *vals_ptr++;
1156       }
1157     }
1158 
1159     /* read in other processors and ship out */
1160     for ( i=1; i<size; i++ ) {
1161       nz   = (rowners[i+1] - rowners[i])*N;
1162       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1163       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1164     }
1165   } else {
1166     /* receive numeric values */
1167     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1168 
1169     /* receive message of values*/
1170     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1171 
1172     /* insert into matrix-by row (this is why cannot directly read into array */
1173     vals_ptr = vals;
1174     for ( i=0; i<m; i++ ) {
1175       for ( j=0; j<N; j++ ) {
1176         array[i + j*m] = *vals_ptr++;
1177       }
1178     }
1179   }
1180   PetscFree(rowners);
1181   PetscFree(vals);
1182   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1183   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 
1188 #undef __FUNC__
1189 #define __FUNC__ "MatLoad_MPIDense"
1190 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1191 {
1192   Mat          A;
1193   Scalar       *vals,*svals;
1194   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1195   MPI_Status   status;
1196   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1197   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1198   int          tag = ((PetscObject)viewer)->tag;
1199   int          i, nz, ierr, j,rstart, rend, fd;
1200 
1201   PetscFunctionBegin;
1202   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1203   if (!rank) {
1204     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1205     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1206     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1207   }
1208 
1209   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1210   M = header[1]; N = header[2]; nz = header[3];
1211 
1212   /*
1213        Handle case where matrix is stored on disk as a dense matrix
1214   */
1215   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1216     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1217     PetscFunctionReturn(0);
1218   }
1219 
1220   /* determine ownership of all rows */
1221   m          = M/size + ((M % size) > rank);
1222   rowners    = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1223   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1224   rowners[0] = 0;
1225   for ( i=2; i<=size; i++ ) {
1226     rowners[i] += rowners[i-1];
1227   }
1228   rstart = rowners[rank];
1229   rend   = rowners[rank+1];
1230 
1231   /* distribute row lengths to all processors */
1232   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1233   offlens = ourlens + (rend-rstart);
1234   if (!rank) {
1235     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1236     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1237     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1238     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1239     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1240     PetscFree(sndcounts);
1241   } else {
1242     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1243   }
1244 
1245   if (!rank) {
1246     /* calculate the number of nonzeros on each processor */
1247     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1248     PetscMemzero(procsnz,size*sizeof(int));
1249     for ( i=0; i<size; i++ ) {
1250       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1251         procsnz[i] += rowlengths[j];
1252       }
1253     }
1254     PetscFree(rowlengths);
1255 
1256     /* determine max buffer needed and allocate it */
1257     maxnz = 0;
1258     for ( i=0; i<size; i++ ) {
1259       maxnz = PetscMax(maxnz,procsnz[i]);
1260     }
1261     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1262 
1263     /* read in my part of the matrix column indices  */
1264     nz = procsnz[0];
1265     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1266     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1267 
1268     /* read in every one elses and ship off */
1269     for ( i=1; i<size; i++ ) {
1270       nz   = procsnz[i];
1271       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1272       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1273     }
1274     PetscFree(cols);
1275   } else {
1276     /* determine buffer space needed for message */
1277     nz = 0;
1278     for ( i=0; i<m; i++ ) {
1279       nz += ourlens[i];
1280     }
1281     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1282 
1283     /* receive message of column indices*/
1284     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1285     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1286     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1287   }
1288 
1289   /* loop over local rows, determining number of off diagonal entries */
1290   PetscMemzero(offlens,m*sizeof(int));
1291   jj = 0;
1292   for ( i=0; i<m; i++ ) {
1293     for ( j=0; j<ourlens[i]; j++ ) {
1294       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1295       jj++;
1296     }
1297   }
1298 
1299   /* create our matrix */
1300   for ( i=0; i<m; i++ ) {
1301     ourlens[i] -= offlens[i];
1302   }
1303   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1304   A = *newmat;
1305   for ( i=0; i<m; i++ ) {
1306     ourlens[i] += offlens[i];
1307   }
1308 
1309   if (!rank) {
1310     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1311 
1312     /* read in my part of the matrix numerical values  */
1313     nz = procsnz[0];
1314     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1315 
1316     /* insert into matrix */
1317     jj      = rstart;
1318     smycols = mycols;
1319     svals   = vals;
1320     for ( i=0; i<m; i++ ) {
1321       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1322       smycols += ourlens[i];
1323       svals   += ourlens[i];
1324       jj++;
1325     }
1326 
1327     /* read in other processors and ship out */
1328     for ( i=1; i<size; i++ ) {
1329       nz   = procsnz[i];
1330       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1331       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1332     }
1333     PetscFree(procsnz);
1334   } else {
1335     /* receive numeric values */
1336     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1337 
1338     /* receive message of values*/
1339     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1340     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1341     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1342 
1343     /* insert into matrix */
1344     jj      = rstart;
1345     smycols = mycols;
1346     svals   = vals;
1347     for ( i=0; i<m; i++ ) {
1348       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1349       smycols += ourlens[i];
1350       svals   += ourlens[i];
1351       jj++;
1352     }
1353   }
1354   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1355 
1356   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1357   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 
1362 
1363 
1364 
1365