xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 5ef9f2a5cf905ed65136deff0c9e7fca368161b7)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.101 1999/01/12 23:15:06 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 #undef __FUNC__
13 #define __FUNC__ "MatSetValues_MPIDense"
14 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
15 {
16   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
17   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
18   int          roworiented = A->roworiented;
19 
20   PetscFunctionBegin;
21   for ( i=0; i<m; i++ ) {
22     if (idxm[i] < 0) continue;
23     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
24     if (idxm[i] >= rstart && idxm[i] < rend) {
25       row = idxm[i] - rstart;
26       if (roworiented) {
27         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
28       } else {
29         for ( j=0; j<n; j++ ) {
30           if (idxn[j] < 0) continue;
31           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
32           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
33         }
34       }
35     } else {
36       if (roworiented) {
37         ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr);
38       } else { /* must stash each seperately */
39         row = idxm[i];
40         for ( j=0; j<n; j++ ) {
41           ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
42         }
43       }
44     }
45   }
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNC__
50 #define __FUNC__ "MatGetValues_MPIDense"
51 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
52 {
53   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
54   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
55 
56   PetscFunctionBegin;
57   for ( i=0; i<m; i++ ) {
58     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
59     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
60     if (idxm[i] >= rstart && idxm[i] < rend) {
61       row = idxm[i] - rstart;
62       for ( j=0; j<n; j++ ) {
63         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
64         if (idxn[j] >= mdn->N) {
65           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
66         }
67         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
68       }
69     } else {
70       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
71     }
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNC__
77 #define __FUNC__ "MatGetArray_MPIDense"
78 int MatGetArray_MPIDense(Mat A,Scalar **array)
79 {
80   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
81   int          ierr;
82 
83   PetscFunctionBegin;
84   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatRestoreArray_MPIDense"
90 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
91 {
92   PetscFunctionBegin;
93   PetscFunctionReturn(0);
94 }
95 
96 #undef __FUNC__
97 #define __FUNC__ "MatAssemblyBegin_MPIDense"
98 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
99 {
100   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
101   MPI_Comm     comm = mat->comm;
102   int          size = mdn->size, *owners = mdn->rowners, rank = mdn->rank;
103   int          *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
104   int          tag = mat->tag, *owner,*starts,count,ierr;
105   InsertMode   addv;
106   MPI_Request  *send_waits,*recv_waits;
107   Scalar       *rvalues,*svalues;
108 
109   PetscFunctionBegin;
110   /* make sure all processors are either in INSERTMODE or ADDMODE */
111   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
112   if (addv == (ADD_VALUES|INSERT_VALUES)) {
113     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
114   }
115   mat->insertmode = addv; /* in case this processor had no cache */
116 
117   /*  first count number of contributors to each processor */
118   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
119   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
120   owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
121   for ( i=0; i<mdn->stash.n; i++ ) {
122     idx = mdn->stash.idx[i];
123     for ( j=0; j<size; j++ ) {
124       if (idx >= owners[j] && idx < owners[j+1]) {
125         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
126       }
127     }
128   }
129   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
130 
131   /* inform other processors of number of messages and max length*/
132   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
133   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
134   nreceives = work[rank];
135   if (nreceives > size) SETERRQ(PETSC_ERR_PLIB,0,"Internal PETSc error");
136   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
137   nmax = work[rank];
138   PetscFree(work);
139 
140   /* post receives:
141        1) each message will consist of ordered pairs
142      (global index,value) we store the global index as a double
143      to simplify the message passing.
144        2) since we don't know how long each individual message is we
145      allocate the largest needed buffer for each receive. Potentially
146      this is a lot of wasted space.
147 
148        This could be done better.
149   */
150   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
151   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
152   for ( i=0; i<nreceives; i++ ) {
153     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
154   }
155 
156   /* do sends:
157       1) starts[i] gives the starting index in svalues for stuff going to
158          the ith processor
159   */
160   svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
161   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
162   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
163   starts[0] = 0;
164   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
165   for ( i=0; i<mdn->stash.n; i++ ) {
166     svalues[3*starts[owner[i]]]       = (Scalar)  mdn->stash.idx[i];
167     svalues[3*starts[owner[i]]+1]     = (Scalar)  mdn->stash.idy[i];
168     svalues[3*(starts[owner[i]]++)+2] =  mdn->stash.array[i];
169   }
170   PetscFree(owner);
171   starts[0] = 0;
172   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
173   count = 0;
174   for ( i=0; i<size; i++ ) {
175     if (procs[i]) {
176       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
177     }
178   }
179   PetscFree(starts); PetscFree(nprocs);
180 
181   /* Free cache space */
182   PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n);
183   ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr);
184 
185   mdn->svalues    = svalues;    mdn->rvalues = rvalues;
186   mdn->nsends     = nsends;     mdn->nrecvs = nreceives;
187   mdn->send_waits = send_waits; mdn->recv_waits = recv_waits;
188   mdn->rmax       = nmax;
189 
190   PetscFunctionReturn(0);
191 }
192 extern int MatSetUpMultiply_MPIDense(Mat);
193 
194 #undef __FUNC__
195 #define __FUNC__ "MatAssemblyEnd_MPIDense"
196 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
197 {
198   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
199   MPI_Status   *send_status,recv_status;
200   int          imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col;
201   Scalar       *values,val;
202   InsertMode   addv = mat->insertmode;
203 
204   PetscFunctionBegin;
205   /*  wait on receives */
206   while (count) {
207     ierr = MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
208     /* unpack receives into our local space */
209     values = mdn->rvalues + 3*imdex*mdn->rmax;
210     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
211     n = n/3;
212     for ( i=0; i<n; i++ ) {
213       row = (int) PetscReal(values[3*i]) - mdn->rstart;
214       col = (int) PetscReal(values[3*i+1]);
215       val = values[3*i+2];
216       if (col >= 0 && col < mdn->N) {
217         MatSetValues(mdn->A,1,&row,1,&col,&val,addv);
218       }
219       else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid column");}
220     }
221     count--;
222   }
223   PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues);
224 
225   /* wait on sends */
226   if (mdn->nsends) {
227     send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
228     ierr        = MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);CHKERRQ(ierr);
229     PetscFree(send_status);
230   }
231   PetscFree(mdn->send_waits); PetscFree(mdn->svalues);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNC__
243 #define __FUNC__ "MatZeroEntries_MPIDense"
244 int MatZeroEntries_MPIDense(Mat A)
245 {
246   int          ierr;
247   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNC__
255 #define __FUNC__ "MatGetBlockSize_MPIDense"
256 int MatGetBlockSize_MPIDense(Mat A,int *bs)
257 {
258   PetscFunctionBegin;
259   *bs = 1;
260   PetscFunctionReturn(0);
261 }
262 
263 /* the code does not do the diagonal entries correctly unless the
264    matrix is square and the column and row owerships are identical.
265    This is a BUG. The only way to fix it seems to be to access
266    mdn->A and mdn->B directly and not through the MatZeroRows()
267    routine.
268 */
269 #undef __FUNC__
270 #define __FUNC__ "MatZeroRows_MPIDense"
271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
272 {
273   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
274   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
275   int            *procs,*nprocs,j,found,idx,nsends,*work;
276   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
277   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
278   int            *lens,imdex,*lrows,*values;
279   MPI_Comm       comm = A->comm;
280   MPI_Request    *send_waits,*recv_waits;
281   MPI_Status     recv_status,*send_status;
282   IS             istmp;
283 
284   PetscFunctionBegin;
285   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
286   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
287 
288   /*  first count number of contributors to each processor */
289   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
290   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
291   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
292   for ( i=0; i<N; i++ ) {
293     idx = rows[i];
294     found = 0;
295     for ( j=0; j<size; j++ ) {
296       if (idx >= owners[j] && idx < owners[j+1]) {
297         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
298       }
299     }
300     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
301   }
302   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
303 
304   /* inform other processors of number of messages and max length*/
305   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
306   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
307   nrecvs = work[rank];
308   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
309   nmax   = work[rank];
310   PetscFree(work);
311 
312   /* post receives:   */
313   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
314   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
315   for ( i=0; i<nrecvs; i++ ) {
316     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
317   }
318 
319   /* do sends:
320       1) starts[i] gives the starting index in svalues for stuff going to
321          the ith processor
322   */
323   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
324   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
325   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
326   starts[0]  = 0;
327   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
328   for ( i=0; i<N; i++ ) {
329     svalues[starts[owner[i]]++] = rows[i];
330   }
331   ISRestoreIndices(is,&rows);
332 
333   starts[0] = 0;
334   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
335   count = 0;
336   for ( i=0; i<size; i++ ) {
337     if (procs[i]) {
338       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
339     }
340   }
341   PetscFree(starts);
342 
343   base = owners[rank];
344 
345   /*  wait on receives */
346   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
347   source = lens + nrecvs;
348   count  = nrecvs; slen = 0;
349   while (count) {
350     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
351     /* unpack receives into our local space */
352     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
353     source[imdex]  = recv_status.MPI_SOURCE;
354     lens[imdex]  = n;
355     slen += n;
356     count--;
357   }
358   PetscFree(recv_waits);
359 
360   /* move the data into the send scatter */
361   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
362   count = 0;
363   for ( i=0; i<nrecvs; i++ ) {
364     values = rvalues + i*nmax;
365     for ( j=0; j<lens[i]; j++ ) {
366       lrows[count++] = values[j] - base;
367     }
368   }
369   PetscFree(rvalues); PetscFree(lens);
370   PetscFree(owner); PetscFree(nprocs);
371 
372   /* actually zap the local rows */
373   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
374   PLogObjectParent(A,istmp);
375   PetscFree(lrows);
376   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
377   ierr = ISDestroy(istmp); CHKERRQ(ierr);
378 
379   /* wait on sends */
380   if (nsends) {
381     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
382     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
383     PetscFree(send_status);
384   }
385   PetscFree(send_waits); PetscFree(svalues);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNC__
391 #define __FUNC__ "MatMult_MPIDense"
392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
395   int          ierr;
396 
397   PetscFunctionBegin;
398   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatMultAdd_MPIDense"
406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
409   int          ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNC__
419 #define __FUNC__ "MatMultTrans_MPIDense"
420 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
421 {
422   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
423   int          ierr;
424   Scalar       zero = 0.0;
425 
426   PetscFunctionBegin;
427   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
428   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
429   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
430   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNC__
435 #define __FUNC__ "MatMultTransAdd_MPIDense"
436 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
439   int          ierr;
440 
441   PetscFunctionBegin;
442   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
443   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
444   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
445   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNC__
450 #define __FUNC__ "MatGetDiagonal_MPIDense"
451 int MatGetDiagonal_MPIDense(Mat A,Vec v)
452 {
453   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
454   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
455   int          ierr, len, i, n, m = a->m, radd;
456   Scalar       *x, zero = 0.0;
457 
458   PetscFunctionBegin;
459   VecSet(&zero,v);
460   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
461   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
462   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
463   len = PetscMin(aloc->m,aloc->n);
464   radd = a->rstart*m;
465   for ( i=0; i<len; i++ ) {
466     x[i] = aloc->v[radd + i*m + i];
467   }
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNC__
472 #define __FUNC__ "MatDestroy_MPIDense"
473 int MatDestroy_MPIDense(Mat mat)
474 {
475   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
476   int          ierr;
477 
478   PetscFunctionBegin;
479   if (--mat->refct > 0) PetscFunctionReturn(0);
480 
481   if (mat->mapping) {
482     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
483   }
484   if (mat->bmapping) {
485     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
486   }
487 #if defined(USE_PETSC_LOG)
488   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
489 #endif
490   PetscFree(mdn->rowners);
491   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
492   if (mdn->lvec)   VecDestroy(mdn->lvec);
493   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
494   if (mdn->factor) {
495     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
496     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
497     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
498     PetscFree(mdn->factor);
499   }
500   PetscFree(mdn);
501   if (mat->rmap) {
502     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
503   }
504   if (mat->cmap) {
505     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
506   }
507   PLogObjectDestroy(mat);
508   PetscHeaderDestroy(mat);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNC__
513 #define __FUNC__ "MatView_MPIDense_Binary"
514 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
515 {
516   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
517   int          ierr;
518 
519   PetscFunctionBegin;
520   if (mdn->size == 1) {
521     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
522   }
523   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNC__
528 #define __FUNC__ "MatView_MPIDense_ASCII"
529 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
530 {
531   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
532   int          ierr, format, size = mdn->size, rank = mdn->rank;
533   FILE         *fd;
534   ViewerType   vtype;
535 
536   PetscFunctionBegin;
537   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
538   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
539   ierr = ViewerGetFormat(viewer,&format);
540   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
541     MatInfo info;
542     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
543     PetscSequentialPhaseBegin(mat->comm,1);
544       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
545          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
546       fflush(fd);
547     PetscSequentialPhaseEnd(mat->comm,1);
548     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
549     PetscFunctionReturn(0);
550   }
551   else if (format == VIEWER_FORMAT_ASCII_INFO) {
552     PetscFunctionReturn(0);
553   }
554 
555   if (size == 1) {
556     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
557   } else {
558     /* assemble the entire matrix onto first processor. */
559     Mat          A;
560     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
561     Scalar       *vals;
562     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
563 
564     if (!rank) {
565       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
566     } else {
567       ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
568     }
569     PLogObjectParent(mat,A);
570 
571     /* Copy the matrix ... This isn't the most efficient means,
572        but it's quick for now */
573     row = mdn->rstart; m = Amdn->m;
574     for ( i=0; i<m; i++ ) {
575       ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
576       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
577       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
578       row++;
579     }
580 
581     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
582     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
583     if (!rank) {
584       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
585     }
586     ierr = MatDestroy(A); CHKERRQ(ierr);
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNC__
592 #define __FUNC__ "MatView_MPIDense"
593 int MatView_MPIDense(Mat mat,Viewer viewer)
594 {
595   int          ierr;
596   ViewerType   vtype;
597 
598   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
599   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
600     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
601   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
602     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
603   } else {
604     SETERRQ(1,1,"Viewer type not supported by PETSc object");
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 #undef __FUNC__
610 #define __FUNC__ "MatGetInfo_MPIDense"
611 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
612 {
613   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
614   Mat          mdn = mat->A;
615   int          ierr;
616   double       isend[5], irecv[5];
617 
618   PetscFunctionBegin;
619   info->rows_global    = (double)mat->M;
620   info->columns_global = (double)mat->N;
621   info->rows_local     = (double)mat->m;
622   info->columns_local  = (double)mat->N;
623   info->block_size     = 1.0;
624   ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr);
625   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
626   isend[3] = info->memory;  isend[4] = info->mallocs;
627   if (flag == MAT_LOCAL) {
628     info->nz_used      = isend[0];
629     info->nz_allocated = isend[1];
630     info->nz_unneeded  = isend[2];
631     info->memory       = isend[3];
632     info->mallocs      = isend[4];
633   } else if (flag == MAT_GLOBAL_MAX) {
634     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
635     info->nz_used      = irecv[0];
636     info->nz_allocated = irecv[1];
637     info->nz_unneeded  = irecv[2];
638     info->memory       = irecv[3];
639     info->mallocs      = irecv[4];
640   } else if (flag == MAT_GLOBAL_SUM) {
641     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
642     info->nz_used      = irecv[0];
643     info->nz_allocated = irecv[1];
644     info->nz_unneeded  = irecv[2];
645     info->memory       = irecv[3];
646     info->mallocs      = irecv[4];
647   }
648   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
649   info->fill_ratio_needed = 0;
650   info->factor_mallocs    = 0;
651   PetscFunctionReturn(0);
652 }
653 
654 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
655    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
656    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
657    extern int MatSolve_MPIDense(Mat,Vec,Vec);
658    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
659    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
660    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
661 
662 #undef __FUNC__
663 #define __FUNC__ "MatSetOption_MPIDense"
664 int MatSetOption_MPIDense(Mat A,MatOption op)
665 {
666   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
667 
668   PetscFunctionBegin;
669   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
670       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
671       op == MAT_NEW_NONZERO_LOCATION_ERROR ||
672       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
673       op == MAT_COLUMNS_SORTED ||
674       op == MAT_COLUMNS_UNSORTED) {
675         MatSetOption(a->A,op);
676   } else if (op == MAT_ROW_ORIENTED) {
677         a->roworiented = 1;
678         MatSetOption(a->A,op);
679   } else if (op == MAT_ROWS_SORTED ||
680              op == MAT_ROWS_UNSORTED ||
681              op == MAT_SYMMETRIC ||
682              op == MAT_STRUCTURALLY_SYMMETRIC ||
683              op == MAT_YES_NEW_DIAGONALS ||
684              op == MAT_USE_HASH_TABLE) {
685     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
686   } else if (op == MAT_COLUMN_ORIENTED) {
687     a->roworiented = 0; MatSetOption(a->A,op);
688   } else if (op == MAT_NO_NEW_DIAGONALS) {
689     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
690   } else {
691     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 #undef __FUNC__
697 #define __FUNC__ "MatGetSize_MPIDense"
698 int MatGetSize_MPIDense(Mat A,int *m,int *n)
699 {
700   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
701 
702   PetscFunctionBegin;
703   *m = mat->M; *n = mat->N;
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNC__
708 #define __FUNC__ "MatGetLocalSize_MPIDense"
709 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
710 {
711   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
712 
713   PetscFunctionBegin;
714   *m = mat->m; *n = mat->N;
715   PetscFunctionReturn(0);
716 }
717 
718 #undef __FUNC__
719 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
720 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
721 {
722   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
723 
724   PetscFunctionBegin;
725   *m = mat->rstart; *n = mat->rend;
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNC__
730 #define __FUNC__ "MatGetRow_MPIDense"
731 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
732 {
733   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
734   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
735 
736   PetscFunctionBegin;
737   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
738   lrow = row - rstart;
739   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNC__
744 #define __FUNC__ "MatRestoreRow_MPIDense"
745 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
746 {
747   PetscFunctionBegin;
748   if (idx) PetscFree(*idx);
749   if (v) PetscFree(*v);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNC__
754 #define __FUNC__ "MatNorm_MPIDense"
755 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
756 {
757   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
758   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
759   int          ierr, i, j;
760   double       sum = 0.0;
761   Scalar       *v = mat->v;
762 
763   PetscFunctionBegin;
764   if (mdn->size == 1) {
765     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
766   } else {
767     if (type == NORM_FROBENIUS) {
768       for (i=0; i<mat->n*mat->m; i++ ) {
769 #if defined(USE_PETSC_COMPLEX)
770         sum += PetscReal(PetscConj(*v)*(*v)); v++;
771 #else
772         sum += (*v)*(*v); v++;
773 #endif
774       }
775       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
776       *norm = sqrt(*norm);
777       PLogFlops(2*mat->n*mat->m);
778     } else if (type == NORM_1) {
779       double *tmp, *tmp2;
780       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
781       tmp2 = tmp + mdn->N;
782       PetscMemzero(tmp,2*mdn->N*sizeof(double));
783       *norm = 0.0;
784       v = mat->v;
785       for ( j=0; j<mat->n; j++ ) {
786         for ( i=0; i<mat->m; i++ ) {
787           tmp[j] += PetscAbsScalar(*v);  v++;
788         }
789       }
790       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
791       for ( j=0; j<mdn->N; j++ ) {
792         if (tmp2[j] > *norm) *norm = tmp2[j];
793       }
794       PetscFree(tmp);
795       PLogFlops(mat->n*mat->m);
796     } else if (type == NORM_INFINITY) { /* max row norm */
797       double ntemp;
798       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
799       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
800     } else {
801       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
802     }
803   }
804   PetscFunctionReturn(0);
805 }
806 
807 #undef __FUNC__
808 #define __FUNC__ "MatTranspose_MPIDense"
809 int MatTranspose_MPIDense(Mat A,Mat *matout)
810 {
811   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
812   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
813   Mat          B;
814   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
815   int          j, i, ierr;
816   Scalar       *v;
817 
818   PetscFunctionBegin;
819   if (matout == PETSC_NULL && M != N) {
820     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
821   }
822   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
823 
824   m = Aloc->m; n = Aloc->n; v = Aloc->v;
825   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
826   for ( j=0; j<n; j++ ) {
827     for (i=0; i<m; i++) rwork[i] = rstart + i;
828     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
829     v   += m;
830   }
831   PetscFree(rwork);
832   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
833   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
834   if (matout != PETSC_NULL) {
835     *matout = B;
836   } else {
837     PetscOps *Abops;
838     MatOps   Aops;
839 
840     /* This isn't really an in-place transpose, but free data struct from a */
841     PetscFree(a->rowners);
842     ierr = MatDestroy(a->A); CHKERRQ(ierr);
843     if (a->lvec) VecDestroy(a->lvec);
844     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
845     PetscFree(a);
846 
847     /*
848          This is horrible, horrible code. We need to keep the
849       A pointers for the bops and ops but copy everything
850       else from C.
851     */
852     Abops = A->bops;
853     Aops  = A->ops;
854     PetscMemcpy(A,B,sizeof(struct _p_Mat));
855     A->bops = Abops;
856     A->ops  = Aops;
857 
858     PetscHeaderDestroy(B);
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 #include "pinclude/blaslapack.h"
864 #undef __FUNC__
865 #define __FUNC__ "MatScale_MPIDense"
866 int MatScale_MPIDense(Scalar *alpha,Mat inA)
867 {
868   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
869   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
870   int          one = 1, nz;
871 
872   PetscFunctionBegin;
873   nz = a->m*a->n;
874   BLscal_( &nz, alpha, a->v, &one );
875   PLogFlops(nz);
876   PetscFunctionReturn(0);
877 }
878 
879 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
880 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
881 
882 /* -------------------------------------------------------------------*/
883 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
884        MatGetRow_MPIDense,
885        MatRestoreRow_MPIDense,
886        MatMult_MPIDense,
887        MatMultAdd_MPIDense,
888        MatMultTrans_MPIDense,
889        MatMultTransAdd_MPIDense,
890        0,
891        0,
892        0,
893        0,
894        0,
895        0,
896        0,
897        MatTranspose_MPIDense,
898        MatGetInfo_MPIDense,0,
899        MatGetDiagonal_MPIDense,
900        0,
901        MatNorm_MPIDense,
902        MatAssemblyBegin_MPIDense,
903        MatAssemblyEnd_MPIDense,
904        0,
905        MatSetOption_MPIDense,
906        MatZeroEntries_MPIDense,
907        MatZeroRows_MPIDense,
908        0,
909        0,
910        0,
911        0,
912        MatGetSize_MPIDense,
913        MatGetLocalSize_MPIDense,
914        MatGetOwnershipRange_MPIDense,
915        0,
916        0,
917        MatGetArray_MPIDense,
918        MatRestoreArray_MPIDense,
919        MatDuplicate_MPIDense,
920        0,
921        0,
922        0,
923        0,
924        0,
925        MatGetSubMatrices_MPIDense,
926        0,
927        MatGetValues_MPIDense,
928        0,
929        0,
930        MatScale_MPIDense,
931        0,
932        0,
933        0,
934        MatGetBlockSize_MPIDense,
935        0,
936        0,
937        0,
938        0,
939        0,
940        0,
941        0,
942        0,
943        0,
944        0,
945        0,
946        0,
947        MatGetMaps_Petsc};
948 
949 #undef __FUNC__
950 #define __FUNC__ "MatCreateMPIDense"
951 /*@C
952    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
953 
954    Collective on MPI_Comm
955 
956    Input Parameters:
957 +  comm - MPI communicator
958 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
959 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
960 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
961 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
962 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
963    to control all matrix memory allocation.
964 
965    Output Parameter:
966 .  A - the matrix
967 
968    Notes:
969    The dense format is fully compatible with standard Fortran 77
970    storage by columns.
971 
972    The data input variable is intended primarily for Fortran programmers
973    who wish to allocate their own matrix memory space.  Most users should
974    set data=PETSC_NULL.
975 
976    The user MUST specify either the local or global matrix dimensions
977    (possibly both).
978 
979    Currently, the only parallel dense matrix decomposition is by rows,
980    so that n=N and each submatrix owns all of the global columns.
981 
982 .keywords: matrix, dense, parallel
983 
984 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
985 @*/
986 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
987 {
988   Mat          mat;
989   Mat_MPIDense *a;
990   int          ierr, i,flg;
991 
992   PetscFunctionBegin;
993   /* Note:  For now, when data is specified above, this assumes the user correctly
994    allocates the local dense storage space.  We should add error checking. */
995 
996   *A = 0;
997   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
998   PLogObjectCreate(mat);
999   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
1000   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1001   mat->ops->destroy    = MatDestroy_MPIDense;
1002   mat->ops->view       = MatView_MPIDense;
1003   mat->factor          = 0;
1004   mat->mapping         = 0;
1005 
1006   a->factor       = 0;
1007   mat->insertmode = NOT_SET_VALUES;
1008   MPI_Comm_rank(comm,&a->rank);
1009   MPI_Comm_size(comm,&a->size);
1010 
1011   if (M == PETSC_DECIDE) {ierr = MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);}
1012   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1013 
1014   /*
1015      The computation of n is wrong below, n should represent the number of local
1016      rows in the right (column vector)
1017   */
1018 
1019   /* each row stores all columns */
1020   if (N == PETSC_DECIDE) N = n;
1021   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1022   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
1023   a->N = mat->N = N;
1024   a->M = mat->M = M;
1025   a->m = mat->m = m;
1026   a->n = mat->n = n;
1027 
1028   /* the information in the maps duplicates the information computed below, eventually
1029      we should remove the duplicate information that is not contained in the maps */
1030   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1031   ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr);
1032 
1033   /* build local table of row and column ownerships */
1034   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1035   a->cowners = a->rowners + a->size + 1;
1036   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1037   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1038   a->rowners[0] = 0;
1039   for ( i=2; i<=a->size; i++ ) {
1040     a->rowners[i] += a->rowners[i-1];
1041   }
1042   a->rstart = a->rowners[a->rank];
1043   a->rend   = a->rowners[a->rank+1];
1044   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1045   a->cowners[0] = 0;
1046   for ( i=2; i<=a->size; i++ ) {
1047     a->cowners[i] += a->cowners[i-1];
1048   }
1049 
1050   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
1051   PLogObjectParent(mat,a->A);
1052 
1053   /* build cache for off array entries formed */
1054   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1055 
1056   /* stuff used for matrix vector multiply */
1057   a->lvec        = 0;
1058   a->Mvctx       = 0;
1059   a->roworiented = 1;
1060 
1061   *A = mat;
1062   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1063   if (flg) {
1064     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNC__
1070 #define __FUNC__ "MatDuplicate_MPIDense"
1071 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1072 {
1073   Mat          mat;
1074   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1075   int          ierr;
1076   FactorCtx    *factor;
1077 
1078   PetscFunctionBegin;
1079   *newmat       = 0;
1080   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1081   PLogObjectCreate(mat);
1082   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
1083   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1084   mat->ops->destroy   = MatDestroy_MPIDense;
1085   mat->ops->view      = MatView_MPIDense;
1086   mat->factor         = A->factor;
1087   mat->assembled      = PETSC_TRUE;
1088 
1089   a->m = mat->m = oldmat->m;
1090   a->n = mat->n = oldmat->n;
1091   a->M = mat->M = oldmat->M;
1092   a->N = mat->N = oldmat->N;
1093   if (oldmat->factor) {
1094     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
1095     /* copy factor contents ... add this code! */
1096   } else a->factor = 0;
1097 
1098   a->rstart       = oldmat->rstart;
1099   a->rend         = oldmat->rend;
1100   a->size         = oldmat->size;
1101   a->rank         = oldmat->rank;
1102   mat->insertmode = NOT_SET_VALUES;
1103 
1104   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1105   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1106   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1107   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1108 
1109   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1110   PLogObjectParent(mat,a->lvec);
1111   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1112   PLogObjectParent(mat,a->Mvctx);
1113   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
1114   PLogObjectParent(mat,a->A);
1115   *newmat = mat;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #include "sys.h"
1120 
1121 #undef __FUNC__
1122 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1123 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1124 {
1125   int        *rowners, i,size,rank,m,ierr,nz,j;
1126   Scalar     *array,*vals,*vals_ptr;
1127   MPI_Status status;
1128 
1129   PetscFunctionBegin;
1130   MPI_Comm_rank(comm,&rank);
1131   MPI_Comm_size(comm,&size);
1132 
1133   /* determine ownership of all rows */
1134   m          = M/size + ((M % size) > rank);
1135   rowners    = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1136   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1137   rowners[0] = 0;
1138   for ( i=2; i<=size; i++ ) {
1139     rowners[i] += rowners[i-1];
1140   }
1141 
1142   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1143   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
1144 
1145   if (!rank) {
1146     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1147 
1148     /* read in my part of the matrix numerical values  */
1149     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR); CHKERRQ(ierr);
1150 
1151     /* insert into matrix-by row (this is why cannot directly read into array */
1152     vals_ptr = vals;
1153     for ( i=0; i<m; i++ ) {
1154       for ( j=0; j<N; j++ ) {
1155         array[i + j*m] = *vals_ptr++;
1156       }
1157     }
1158 
1159     /* read in other processors and ship out */
1160     for ( i=1; i<size; i++ ) {
1161       nz   = (rowners[i+1] - rowners[i])*N;
1162       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1163       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1164     }
1165   } else {
1166     /* receive numeric values */
1167     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1168 
1169     /* receive message of values*/
1170     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1171 
1172     /* insert into matrix-by row (this is why cannot directly read into array */
1173     vals_ptr = vals;
1174     for ( i=0; i<m; i++ ) {
1175       for ( j=0; j<N; j++ ) {
1176         array[i + j*m] = *vals_ptr++;
1177       }
1178     }
1179   }
1180   PetscFree(rowners);
1181   PetscFree(vals);
1182   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1183   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 
1188 #undef __FUNC__
1189 #define __FUNC__ "MatLoad_MPIDense"
1190 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1191 {
1192   Mat          A;
1193   Scalar       *vals,*svals;
1194   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1195   MPI_Status   status;
1196   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1197   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1198   int          tag = ((PetscObject)viewer)->tag;
1199   int          i, nz, ierr, j,rstart, rend, fd;
1200 
1201   PetscFunctionBegin;
1202   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1203   if (!rank) {
1204     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1205     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1206     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1207   }
1208 
1209   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1210   M = header[1]; N = header[2]; nz = header[3];
1211 
1212   /*
1213        Handle case where matrix is stored on disk as a dense matrix
1214   */
1215   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1216     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1217     PetscFunctionReturn(0);
1218   }
1219 
1220   /* determine ownership of all rows */
1221   m          = M/size + ((M % size) > rank);
1222   rowners    = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1223   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1224   rowners[0] = 0;
1225   for ( i=2; i<=size; i++ ) {
1226     rowners[i] += rowners[i-1];
1227   }
1228   rstart = rowners[rank];
1229   rend   = rowners[rank+1];
1230 
1231   /* distribute row lengths to all processors */
1232   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1233   offlens = ourlens + (rend-rstart);
1234   if (!rank) {
1235     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1236     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1237     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1238     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1239     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1240     PetscFree(sndcounts);
1241   } else {
1242     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1243   }
1244 
1245   if (!rank) {
1246     /* calculate the number of nonzeros on each processor */
1247     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1248     PetscMemzero(procsnz,size*sizeof(int));
1249     for ( i=0; i<size; i++ ) {
1250       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1251         procsnz[i] += rowlengths[j];
1252       }
1253     }
1254     PetscFree(rowlengths);
1255 
1256     /* determine max buffer needed and allocate it */
1257     maxnz = 0;
1258     for ( i=0; i<size; i++ ) {
1259       maxnz = PetscMax(maxnz,procsnz[i]);
1260     }
1261     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1262 
1263     /* read in my part of the matrix column indices  */
1264     nz = procsnz[0];
1265     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1266     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1267 
1268     /* read in every one elses and ship off */
1269     for ( i=1; i<size; i++ ) {
1270       nz   = procsnz[i];
1271       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1272       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1273     }
1274     PetscFree(cols);
1275   } else {
1276     /* determine buffer space needed for message */
1277     nz = 0;
1278     for ( i=0; i<m; i++ ) {
1279       nz += ourlens[i];
1280     }
1281     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1282 
1283     /* receive message of column indices*/
1284     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1285     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1286     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1287   }
1288 
1289   /* loop over local rows, determining number of off diagonal entries */
1290   PetscMemzero(offlens,m*sizeof(int));
1291   jj = 0;
1292   for ( i=0; i<m; i++ ) {
1293     for ( j=0; j<ourlens[i]; j++ ) {
1294       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1295       jj++;
1296     }
1297   }
1298 
1299   /* create our matrix */
1300   for ( i=0; i<m; i++ ) {
1301     ourlens[i] -= offlens[i];
1302   }
1303   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1304   A = *newmat;
1305   for ( i=0; i<m; i++ ) {
1306     ourlens[i] += offlens[i];
1307   }
1308 
1309   if (!rank) {
1310     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1311 
1312     /* read in my part of the matrix numerical values  */
1313     nz = procsnz[0];
1314     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1315 
1316     /* insert into matrix */
1317     jj      = rstart;
1318     smycols = mycols;
1319     svals   = vals;
1320     for ( i=0; i<m; i++ ) {
1321       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1322       smycols += ourlens[i];
1323       svals   += ourlens[i];
1324       jj++;
1325     }
1326 
1327     /* read in other processors and ship out */
1328     for ( i=1; i<size; i++ ) {
1329       nz   = procsnz[i];
1330       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1331       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1332     }
1333     PetscFree(procsnz);
1334   } else {
1335     /* receive numeric values */
1336     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1337 
1338     /* receive message of values*/
1339     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1340     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1341     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1342 
1343     /* insert into matrix */
1344     jj      = rstart;
1345     smycols = mycols;
1346     svals   = vals;
1347     for ( i=0; i<m; i++ ) {
1348       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1349       smycols += ourlens[i];
1350       svals   += ourlens[i];
1351       jj++;
1352     }
1353   }
1354   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1355 
1356   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1357   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 
1362 
1363 
1364 
1365