xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 5a655dc69978bfa5d89a399bdbc880d673ee91a3)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.109 1999/03/16 21:10:18 balay Exp balay $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 extern int MatSetUpMultiply_MPIDense(Mat);
13 
14 #undef __FUNC__
15 #define __FUNC__ "MatSetValues_MPIDense"
16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
17 {
18   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
19   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
20   int          roworiented = A->roworiented;
21 
22   PetscFunctionBegin;
23   for ( i=0; i<m; i++ ) {
24     if (idxm[i] < 0) continue;
25     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
26     if (idxm[i] >= rstart && idxm[i] < rend) {
27       row = idxm[i] - rstart;
28       if (roworiented) {
29         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr);
30       } else {
31         for ( j=0; j<n; j++ ) {
32           if (idxn[j] < 0) continue;
33           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
34           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr);
35         }
36       }
37     } else {
38       if (roworiented) {
39         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n); CHKERRQ(ierr);
40       } else {
41         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
42       }
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNC__
49 #define __FUNC__ "MatGetValues_MPIDense"
50 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
51 {
52   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
53   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
54 
55   PetscFunctionBegin;
56   for ( i=0; i<m; i++ ) {
57     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
58     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
59     if (idxm[i] >= rstart && idxm[i] < rend) {
60       row = idxm[i] - rstart;
61       for ( j=0; j<n; j++ ) {
62         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
63         if (idxn[j] >= mdn->N) {
64           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
65         }
66         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr);
67       }
68     } else {
69       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
70     }
71   }
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNC__
76 #define __FUNC__ "MatGetArray_MPIDense"
77 int MatGetArray_MPIDense(Mat A,Scalar **array)
78 {
79   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
80   int          ierr;
81 
82   PetscFunctionBegin;
83   ierr = MatGetArray(a->A,array); CHKERRQ(ierr);
84   PetscFunctionReturn(0);
85 }
86 
87 #undef __FUNC__
88 #define __FUNC__ "MatRestoreArray_MPIDense"
89 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
90 {
91   PetscFunctionBegin;
92   PetscFunctionReturn(0);
93 }
94 
95 #undef __FUNC__
96 #define __FUNC__ "MatAssemblyBegin_MPIDense"
97 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
98 {
99   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
100   MPI_Comm     comm = mat->comm;
101   int          ierr,nstash,reallocs;
102   InsertMode   addv;
103 
104   PetscFunctionBegin;
105   /* make sure all processors are either in INSERTMODE or ADDMODE */
106   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
107   if (addv == (ADD_VALUES|INSERT_VALUES)) {
108     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
109   }
110   mat->insertmode = addv; /* in case this processor had no cache */
111 
112   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners); CHKERRQ(ierr);
113   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr);
114   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
115            nstash,reallocs);
116   PetscFunctionReturn(0);
117 }
118 
119 #undef __FUNC__
120 #define __FUNC__ "MatAssemblyEnd_MPIDense"
121 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
122 {
123   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
124   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
125   Scalar       *val;
126   InsertMode   addv=mat->insertmode;
127 
128   PetscFunctionBegin;
129   /*  wait on receives */
130   while (1) {
131     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
132     if (!flg) break;
133 
134     for ( i=0; i<n; ) {
135       /* Now identify the consecutive vals belonging to the same row */
136       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
137       if (j < n) ncols = j-i;
138       else       ncols = n-i;
139       /* Now assemble all these values with a single function call */
140       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
141       i = j;
142     }
143   }
144   ierr = MatStashScatterEnd_Private(&mat->stash); CHKERRQ(ierr);
145 
146   ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr);
147   ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr);
148 
149   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
150     ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNC__
156 #define __FUNC__ "MatZeroEntries_MPIDense"
157 int MatZeroEntries_MPIDense(Mat A)
158 {
159   int          ierr;
160   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
161 
162   PetscFunctionBegin;
163   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNC__
168 #define __FUNC__ "MatGetBlockSize_MPIDense"
169 int MatGetBlockSize_MPIDense(Mat A,int *bs)
170 {
171   PetscFunctionBegin;
172   *bs = 1;
173   PetscFunctionReturn(0);
174 }
175 
176 /* the code does not do the diagonal entries correctly unless the
177    matrix is square and the column and row owerships are identical.
178    This is a BUG. The only way to fix it seems to be to access
179    mdn->A and mdn->B directly and not through the MatZeroRows()
180    routine.
181 */
182 #undef __FUNC__
183 #define __FUNC__ "MatZeroRows_MPIDense"
184 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
185 {
186   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
187   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
188   int            *procs,*nprocs,j,found,idx,nsends,*work;
189   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
190   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
191   int            *lens,imdex,*lrows,*values;
192   MPI_Comm       comm = A->comm;
193   MPI_Request    *send_waits,*recv_waits;
194   MPI_Status     recv_status,*send_status;
195   IS             istmp;
196 
197   PetscFunctionBegin;
198   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
199   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
200 
201   /*  first count number of contributors to each processor */
202   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
203   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
204   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
205   for ( i=0; i<N; i++ ) {
206     idx = rows[i];
207     found = 0;
208     for ( j=0; j<size; j++ ) {
209       if (idx >= owners[j] && idx < owners[j+1]) {
210         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
211       }
212     }
213     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
214   }
215   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
216 
217   /* inform other processors of number of messages and max length*/
218   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
219   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
220   nrecvs = work[rank];
221   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
222   nmax   = work[rank];
223   PetscFree(work);
224 
225   /* post receives:   */
226   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
227   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
228   for ( i=0; i<nrecvs; i++ ) {
229     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
230   }
231 
232   /* do sends:
233       1) starts[i] gives the starting index in svalues for stuff going to
234          the ith processor
235   */
236   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
237   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
238   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
239   starts[0]  = 0;
240   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
241   for ( i=0; i<N; i++ ) {
242     svalues[starts[owner[i]]++] = rows[i];
243   }
244   ISRestoreIndices(is,&rows);
245 
246   starts[0] = 0;
247   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
248   count = 0;
249   for ( i=0; i<size; i++ ) {
250     if (procs[i]) {
251       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
252     }
253   }
254   PetscFree(starts);
255 
256   base = owners[rank];
257 
258   /*  wait on receives */
259   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
260   source = lens + nrecvs;
261   count  = nrecvs; slen = 0;
262   while (count) {
263     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
264     /* unpack receives into our local space */
265     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
266     source[imdex]  = recv_status.MPI_SOURCE;
267     lens[imdex]  = n;
268     slen += n;
269     count--;
270   }
271   PetscFree(recv_waits);
272 
273   /* move the data into the send scatter */
274   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
275   count = 0;
276   for ( i=0; i<nrecvs; i++ ) {
277     values = rvalues + i*nmax;
278     for ( j=0; j<lens[i]; j++ ) {
279       lrows[count++] = values[j] - base;
280     }
281   }
282   PetscFree(rvalues); PetscFree(lens);
283   PetscFree(owner); PetscFree(nprocs);
284 
285   /* actually zap the local rows */
286   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
287   PLogObjectParent(A,istmp);
288   PetscFree(lrows);
289   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
290   ierr = ISDestroy(istmp); CHKERRQ(ierr);
291 
292   /* wait on sends */
293   if (nsends) {
294     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
295     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
296     PetscFree(send_status);
297   }
298   PetscFree(send_waits); PetscFree(svalues);
299 
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNC__
304 #define __FUNC__ "MatMult_MPIDense"
305 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
306 {
307   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
308   int          ierr;
309 
310   PetscFunctionBegin;
311   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
312   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
313   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr);
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNC__
318 #define __FUNC__ "MatMultAdd_MPIDense"
319 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
320 {
321   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
322   int          ierr;
323 
324   PetscFunctionBegin;
325   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
326   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
327   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 #undef __FUNC__
332 #define __FUNC__ "MatMultTrans_MPIDense"
333 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
334 {
335   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
336   int          ierr;
337   Scalar       zero = 0.0;
338 
339   PetscFunctionBegin;
340   ierr = VecSet(&zero,yy); CHKERRQ(ierr);
341   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
342   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
343   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNC__
348 #define __FUNC__ "MatMultTransAdd_MPIDense"
349 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
350 {
351   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
352   int          ierr;
353 
354   PetscFunctionBegin;
355   ierr = VecCopy(yy,zz); CHKERRQ(ierr);
356   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr);
357   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
358   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNC__
363 #define __FUNC__ "MatGetDiagonal_MPIDense"
364 int MatGetDiagonal_MPIDense(Mat A,Vec v)
365 {
366   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
367   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
368   int          ierr, len, i, n, m = a->m, radd;
369   Scalar       *x, zero = 0.0;
370 
371   PetscFunctionBegin;
372   VecSet(&zero,v);
373   ierr = VecGetArray(v,&x); CHKERRQ(ierr);
374   ierr = VecGetSize(v,&n); CHKERRQ(ierr);
375   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
376   len = PetscMin(aloc->m,aloc->n);
377   radd = a->rstart*m;
378   for ( i=0; i<len; i++ ) {
379     x[i] = aloc->v[radd + i*m + i];
380   }
381   PetscFunctionReturn(0);
382 }
383 
384 #undef __FUNC__
385 #define __FUNC__ "MatDestroy_MPIDense"
386 int MatDestroy_MPIDense(Mat mat)
387 {
388   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
389   int          ierr;
390 
391   PetscFunctionBegin;
392   if (--mat->refct > 0) PetscFunctionReturn(0);
393 
394   if (mat->mapping) {
395     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
396   }
397   if (mat->bmapping) {
398     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
399   }
400 #if defined(USE_PETSC_LOG)
401   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
402 #endif
403   ierr = MatStashDestroy_Private(&mat->stash); CHKERRQ(ierr);
404   PetscFree(mdn->rowners);
405   ierr = MatDestroy(mdn->A); CHKERRQ(ierr);
406   if (mdn->lvec)   VecDestroy(mdn->lvec);
407   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
408   if (mdn->factor) {
409     if (mdn->factor->temp)   PetscFree(mdn->factor->temp);
410     if (mdn->factor->tag)    PetscFree(mdn->factor->tag);
411     if (mdn->factor->pivots) PetscFree(mdn->factor->pivots);
412     PetscFree(mdn->factor);
413   }
414   PetscFree(mdn);
415   if (mat->rmap) {
416     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
417   }
418   if (mat->cmap) {
419     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
420   }
421   PLogObjectDestroy(mat);
422   PetscHeaderDestroy(mat);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNC__
427 #define __FUNC__ "MatView_MPIDense_Binary"
428 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
429 {
430   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
431   int          ierr;
432 
433   PetscFunctionBegin;
434   if (mdn->size == 1) {
435     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
436   }
437   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNC__
442 #define __FUNC__ "MatView_MPIDense_ASCII"
443 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
444 {
445   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
446   int          ierr, format, size = mdn->size, rank = mdn->rank;
447   FILE         *fd;
448   ViewerType   vtype;
449 
450   PetscFunctionBegin;
451   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
452   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
453   ierr = ViewerGetFormat(viewer,&format);
454   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
455     MatInfo info;
456     ierr = MatGetInfo(mat,MAT_LOCAL,&info);
457     PetscSequentialPhaseBegin(mat->comm,1);
458       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
459          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
460       fflush(fd);
461     PetscSequentialPhaseEnd(mat->comm,1);
462     ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr);
463     PetscFunctionReturn(0);
464   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
465     PetscFunctionReturn(0);
466   }
467 
468   if (size == 1) {
469     ierr = MatView(mdn->A,viewer); CHKERRQ(ierr);
470   } else {
471     /* assemble the entire matrix onto first processor. */
472     Mat          A;
473     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
474     Scalar       *vals;
475     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
476 
477     if (!rank) {
478       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
479     } else {
480       ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr);
481     }
482     PLogObjectParent(mat,A);
483 
484     /* Copy the matrix ... This isn't the most efficient means,
485        but it's quick for now */
486     row = mdn->rstart; m = Amdn->m;
487     for ( i=0; i<m; i++ ) {
488       ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
489       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr);
490       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr);
491       row++;
492     }
493 
494     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
495     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
496     if (!rank) {
497       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr);
498     }
499     ierr = MatDestroy(A); CHKERRQ(ierr);
500   }
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNC__
505 #define __FUNC__ "MatView_MPIDense"
506 int MatView_MPIDense(Mat mat,Viewer viewer)
507 {
508   int          ierr;
509   ViewerType   vtype;
510 
511   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
512   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
513     ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr);
514   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
515     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
516   } else {
517     SETERRQ(1,1,"Viewer type not supported by PETSc object");
518   }
519   PetscFunctionReturn(0);
520 }
521 
522 #undef __FUNC__
523 #define __FUNC__ "MatGetInfo_MPIDense"
524 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
525 {
526   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
527   Mat          mdn = mat->A;
528   int          ierr;
529   double       isend[5], irecv[5];
530 
531   PetscFunctionBegin;
532   info->rows_global    = (double)mat->M;
533   info->columns_global = (double)mat->N;
534   info->rows_local     = (double)mat->m;
535   info->columns_local  = (double)mat->N;
536   info->block_size     = 1.0;
537   ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr);
538   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
539   isend[3] = info->memory;  isend[4] = info->mallocs;
540   if (flag == MAT_LOCAL) {
541     info->nz_used      = isend[0];
542     info->nz_allocated = isend[1];
543     info->nz_unneeded  = isend[2];
544     info->memory       = isend[3];
545     info->mallocs      = isend[4];
546   } else if (flag == MAT_GLOBAL_MAX) {
547     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
548     info->nz_used      = irecv[0];
549     info->nz_allocated = irecv[1];
550     info->nz_unneeded  = irecv[2];
551     info->memory       = irecv[3];
552     info->mallocs      = irecv[4];
553   } else if (flag == MAT_GLOBAL_SUM) {
554     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
555     info->nz_used      = irecv[0];
556     info->nz_allocated = irecv[1];
557     info->nz_unneeded  = irecv[2];
558     info->memory       = irecv[3];
559     info->mallocs      = irecv[4];
560   }
561   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
562   info->fill_ratio_needed = 0;
563   info->factor_mallocs    = 0;
564   PetscFunctionReturn(0);
565 }
566 
567 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
568    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
569    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
570    extern int MatSolve_MPIDense(Mat,Vec,Vec);
571    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
572    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
573    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
574 
575 #undef __FUNC__
576 #define __FUNC__ "MatSetOption_MPIDense"
577 int MatSetOption_MPIDense(Mat A,MatOption op)
578 {
579   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
580 
581   PetscFunctionBegin;
582   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
583       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
584       op == MAT_NEW_NONZERO_LOCATION_ERR ||
585       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
586       op == MAT_COLUMNS_SORTED ||
587       op == MAT_COLUMNS_UNSORTED) {
588         MatSetOption(a->A,op);
589   } else if (op == MAT_ROW_ORIENTED) {
590         a->roworiented = 1;
591         MatSetOption(a->A,op);
592   } else if (op == MAT_ROWS_SORTED ||
593              op == MAT_ROWS_UNSORTED ||
594              op == MAT_SYMMETRIC ||
595              op == MAT_STRUCTURALLY_SYMMETRIC ||
596              op == MAT_YES_NEW_DIAGONALS ||
597              op == MAT_USE_HASH_TABLE) {
598     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
599   } else if (op == MAT_COLUMN_ORIENTED) {
600     a->roworiented = 0; MatSetOption(a->A,op);
601   } else if (op == MAT_NO_NEW_DIAGONALS) {
602     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
603   } else {
604     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 #undef __FUNC__
610 #define __FUNC__ "MatGetSize_MPIDense"
611 int MatGetSize_MPIDense(Mat A,int *m,int *n)
612 {
613   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
614 
615   PetscFunctionBegin;
616   *m = mat->M; *n = mat->N;
617   PetscFunctionReturn(0);
618 }
619 
620 #undef __FUNC__
621 #define __FUNC__ "MatGetLocalSize_MPIDense"
622 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
623 {
624   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
625 
626   PetscFunctionBegin;
627   *m = mat->m; *n = mat->N;
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNC__
632 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
633 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
634 {
635   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
636 
637   PetscFunctionBegin;
638   *m = mat->rstart; *n = mat->rend;
639   PetscFunctionReturn(0);
640 }
641 
642 #undef __FUNC__
643 #define __FUNC__ "MatGetRow_MPIDense"
644 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
645 {
646   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
647   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
648 
649   PetscFunctionBegin;
650   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
651   lrow = row - rstart;
652   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 #undef __FUNC__
657 #define __FUNC__ "MatRestoreRow_MPIDense"
658 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
659 {
660   PetscFunctionBegin;
661   if (idx) PetscFree(*idx);
662   if (v) PetscFree(*v);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNC__
667 #define __FUNC__ "MatNorm_MPIDense"
668 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
669 {
670   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
671   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
672   int          ierr, i, j;
673   double       sum = 0.0;
674   Scalar       *v = mat->v;
675 
676   PetscFunctionBegin;
677   if (mdn->size == 1) {
678     ierr =  MatNorm(mdn->A,type,norm); CHKERRQ(ierr);
679   } else {
680     if (type == NORM_FROBENIUS) {
681       for (i=0; i<mat->n*mat->m; i++ ) {
682 #if defined(USE_PETSC_COMPLEX)
683         sum += PetscReal(PetscConj(*v)*(*v)); v++;
684 #else
685         sum += (*v)*(*v); v++;
686 #endif
687       }
688       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
689       *norm = sqrt(*norm);
690       PLogFlops(2*mat->n*mat->m);
691     } else if (type == NORM_1) {
692       double *tmp, *tmp2;
693       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp);
694       tmp2 = tmp + mdn->N;
695       PetscMemzero(tmp,2*mdn->N*sizeof(double));
696       *norm = 0.0;
697       v = mat->v;
698       for ( j=0; j<mat->n; j++ ) {
699         for ( i=0; i<mat->m; i++ ) {
700           tmp[j] += PetscAbsScalar(*v);  v++;
701         }
702       }
703       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
704       for ( j=0; j<mdn->N; j++ ) {
705         if (tmp2[j] > *norm) *norm = tmp2[j];
706       }
707       PetscFree(tmp);
708       PLogFlops(mat->n*mat->m);
709     } else if (type == NORM_INFINITY) { /* max row norm */
710       double ntemp;
711       ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr);
712       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
713     } else {
714       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
715     }
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNC__
721 #define __FUNC__ "MatTranspose_MPIDense"
722 int MatTranspose_MPIDense(Mat A,Mat *matout)
723 {
724   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
725   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
726   Mat          B;
727   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
728   int          j, i, ierr;
729   Scalar       *v;
730 
731   PetscFunctionBegin;
732   if (matout == PETSC_NULL && M != N) {
733     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
734   }
735   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
736 
737   m = Aloc->m; n = Aloc->n; v = Aloc->v;
738   rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork);
739   for ( j=0; j<n; j++ ) {
740     for (i=0; i<m; i++) rwork[i] = rstart + i;
741     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr);
742     v   += m;
743   }
744   PetscFree(rwork);
745   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
746   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
747   if (matout != PETSC_NULL) {
748     *matout = B;
749   } else {
750     PetscOps *Abops;
751     MatOps   Aops;
752 
753     /* This isn't really an in-place transpose, but free data struct from a */
754     PetscFree(a->rowners);
755     ierr = MatDestroy(a->A); CHKERRQ(ierr);
756     if (a->lvec) VecDestroy(a->lvec);
757     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
758     PetscFree(a);
759 
760     /*
761          This is horrible, horrible code. We need to keep the
762       A pointers for the bops and ops but copy everything
763       else from C.
764     */
765     Abops = A->bops;
766     Aops  = A->ops;
767     PetscMemcpy(A,B,sizeof(struct _p_Mat));
768     A->bops = Abops;
769     A->ops  = Aops;
770 
771     PetscHeaderDestroy(B);
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 #include "pinclude/blaslapack.h"
777 #undef __FUNC__
778 #define __FUNC__ "MatScale_MPIDense"
779 int MatScale_MPIDense(Scalar *alpha,Mat inA)
780 {
781   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
782   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
783   int          one = 1, nz;
784 
785   PetscFunctionBegin;
786   nz = a->m*a->n;
787   BLscal_( &nz, alpha, a->v, &one );
788   PLogFlops(nz);
789   PetscFunctionReturn(0);
790 }
791 
792 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
793 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
794 
795 /* -------------------------------------------------------------------*/
796 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
797        MatGetRow_MPIDense,
798        MatRestoreRow_MPIDense,
799        MatMult_MPIDense,
800        MatMultAdd_MPIDense,
801        MatMultTrans_MPIDense,
802        MatMultTransAdd_MPIDense,
803        0,
804        0,
805        0,
806        0,
807        0,
808        0,
809        0,
810        MatTranspose_MPIDense,
811        MatGetInfo_MPIDense,0,
812        MatGetDiagonal_MPIDense,
813        0,
814        MatNorm_MPIDense,
815        MatAssemblyBegin_MPIDense,
816        MatAssemblyEnd_MPIDense,
817        0,
818        MatSetOption_MPIDense,
819        MatZeroEntries_MPIDense,
820        MatZeroRows_MPIDense,
821        0,
822        0,
823        0,
824        0,
825        MatGetSize_MPIDense,
826        MatGetLocalSize_MPIDense,
827        MatGetOwnershipRange_MPIDense,
828        0,
829        0,
830        MatGetArray_MPIDense,
831        MatRestoreArray_MPIDense,
832        MatDuplicate_MPIDense,
833        0,
834        0,
835        0,
836        0,
837        0,
838        MatGetSubMatrices_MPIDense,
839        0,
840        MatGetValues_MPIDense,
841        0,
842        0,
843        MatScale_MPIDense,
844        0,
845        0,
846        0,
847        MatGetBlockSize_MPIDense,
848        0,
849        0,
850        0,
851        0,
852        0,
853        0,
854        0,
855        0,
856        0,
857        0,
858        0,
859        0,
860        MatGetMaps_Petsc};
861 
862 #undef __FUNC__
863 #define __FUNC__ "MatCreateMPIDense"
864 /*@C
865    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
866 
867    Collective on MPI_Comm
868 
869    Input Parameters:
870 +  comm - MPI communicator
871 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
872 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
873 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
874 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
875 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
876    to control all matrix memory allocation.
877 
878    Output Parameter:
879 .  A - the matrix
880 
881    Notes:
882    The dense format is fully compatible with standard Fortran 77
883    storage by columns.
884 
885    The data input variable is intended primarily for Fortran programmers
886    who wish to allocate their own matrix memory space.  Most users should
887    set data=PETSC_NULL.
888 
889    The user MUST specify either the local or global matrix dimensions
890    (possibly both).
891 
892    Currently, the only parallel dense matrix decomposition is by rows,
893    so that n=N and each submatrix owns all of the global columns.
894 
895    Level: intermediate
896 
897 .keywords: matrix, dense, parallel
898 
899 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
900 @*/
901 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
902 {
903   Mat          mat;
904   Mat_MPIDense *a;
905   int          ierr, i,flg;
906 
907   PetscFunctionBegin;
908   /* Note:  For now, when data is specified above, this assumes the user correctly
909    allocates the local dense storage space.  We should add error checking. */
910 
911   *A = 0;
912   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
913   PLogObjectCreate(mat);
914   mat->data       = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
915   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
916   mat->ops->destroy    = MatDestroy_MPIDense;
917   mat->ops->view       = MatView_MPIDense;
918   mat->factor          = 0;
919   mat->mapping         = 0;
920 
921   a->factor       = 0;
922   mat->insertmode = NOT_SET_VALUES;
923   MPI_Comm_rank(comm,&a->rank);
924   MPI_Comm_size(comm,&a->size);
925 
926   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
927 
928   /*
929      The computation of n is wrong below, n should represent the number of local
930      rows in the right (column vector)
931   */
932 
933   /* each row stores all columns */
934   if (N == PETSC_DECIDE) N = n;
935   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
936   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
937   a->N = mat->N = N;
938   a->M = mat->M = M;
939   a->m = mat->m = m;
940   a->n = mat->n = n;
941 
942   /* the information in the maps duplicates the information computed below, eventually
943      we should remove the duplicate information that is not contained in the maps */
944   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
945   ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr);
946 
947   /* build local table of row and column ownerships */
948   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
949   a->cowners = a->rowners + a->size + 1;
950   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
951   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
952   a->rowners[0] = 0;
953   for ( i=2; i<=a->size; i++ ) {
954     a->rowners[i] += a->rowners[i-1];
955   }
956   a->rstart = a->rowners[a->rank];
957   a->rend   = a->rowners[a->rank+1];
958   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
959   a->cowners[0] = 0;
960   for ( i=2; i<=a->size; i++ ) {
961     a->cowners[i] += a->cowners[i-1];
962   }
963 
964   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr);
965   PLogObjectParent(mat,a->A);
966 
967   /* build cache for off array entries formed */
968   ierr = MatStashCreate_Private(comm,1,&mat->stash); CHKERRQ(ierr);
969 
970   /* stuff used for matrix vector multiply */
971   a->lvec        = 0;
972   a->Mvctx       = 0;
973   a->roworiented = 1;
974 
975   *A = mat;
976   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
977   if (flg) {
978     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
979   }
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNC__
984 #define __FUNC__ "MatDuplicate_MPIDense"
985 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
986 {
987   Mat          mat;
988   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
989   int          ierr;
990   FactorCtx    *factor;
991 
992   PetscFunctionBegin;
993   *newmat       = 0;
994   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
995   PLogObjectCreate(mat);
996   mat->data      = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a);
997   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
998   mat->ops->destroy   = MatDestroy_MPIDense;
999   mat->ops->view      = MatView_MPIDense;
1000   mat->factor         = A->factor;
1001   mat->assembled      = PETSC_TRUE;
1002 
1003   a->m = mat->m = oldmat->m;
1004   a->n = mat->n = oldmat->n;
1005   a->M = mat->M = oldmat->M;
1006   a->N = mat->N = oldmat->N;
1007   if (oldmat->factor) {
1008     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor);
1009     /* copy factor contents ... add this code! */
1010   } else a->factor = 0;
1011 
1012   a->rstart       = oldmat->rstart;
1013   a->rend         = oldmat->rend;
1014   a->size         = oldmat->size;
1015   a->rank         = oldmat->rank;
1016   mat->insertmode = NOT_SET_VALUES;
1017 
1018   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1019   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1020   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1021   ierr = MatStashCreate_Private(A->comm,1,&mat->stash); CHKERRQ(ierr);
1022 
1023   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1024   PLogObjectParent(mat,a->lvec);
1025   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1026   PLogObjectParent(mat,a->Mvctx);
1027   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
1028   PLogObjectParent(mat,a->A);
1029   *newmat = mat;
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 #include "sys.h"
1034 
1035 #undef __FUNC__
1036 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1037 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1038 {
1039   int        *rowners, i,size,rank,m,ierr,nz,j;
1040   Scalar     *array,*vals,*vals_ptr;
1041   MPI_Status status;
1042 
1043   PetscFunctionBegin;
1044   MPI_Comm_rank(comm,&rank);
1045   MPI_Comm_size(comm,&size);
1046 
1047   /* determine ownership of all rows */
1048   m          = M/size + ((M % size) > rank);
1049   rowners    = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1050   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1051   rowners[0] = 0;
1052   for ( i=2; i<=size; i++ ) {
1053     rowners[i] += rowners[i-1];
1054   }
1055 
1056   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1057   ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr);
1058 
1059   if (!rank) {
1060     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1061 
1062     /* read in my part of the matrix numerical values  */
1063     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR); CHKERRQ(ierr);
1064 
1065     /* insert into matrix-by row (this is why cannot directly read into array */
1066     vals_ptr = vals;
1067     for ( i=0; i<m; i++ ) {
1068       for ( j=0; j<N; j++ ) {
1069         array[i + j*m] = *vals_ptr++;
1070       }
1071     }
1072 
1073     /* read in other processors and ship out */
1074     for ( i=1; i<size; i++ ) {
1075       nz   = (rowners[i+1] - rowners[i])*N;
1076       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1077       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1078     }
1079   } else {
1080     /* receive numeric values */
1081     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals);
1082 
1083     /* receive message of values*/
1084     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1085 
1086     /* insert into matrix-by row (this is why cannot directly read into array */
1087     vals_ptr = vals;
1088     for ( i=0; i<m; i++ ) {
1089       for ( j=0; j<N; j++ ) {
1090         array[i + j*m] = *vals_ptr++;
1091       }
1092     }
1093   }
1094   PetscFree(rowners);
1095   PetscFree(vals);
1096   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1097   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 
1102 #undef __FUNC__
1103 #define __FUNC__ "MatLoad_MPIDense"
1104 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1105 {
1106   Mat          A;
1107   Scalar       *vals,*svals;
1108   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1109   MPI_Status   status;
1110   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1111   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1112   int          tag = ((PetscObject)viewer)->tag;
1113   int          i, nz, ierr, j,rstart, rend, fd;
1114 
1115   PetscFunctionBegin;
1116   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1117   if (!rank) {
1118     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1119     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1120     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1121   }
1122 
1123   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1124   M = header[1]; N = header[2]; nz = header[3];
1125 
1126   /*
1127        Handle case where matrix is stored on disk as a dense matrix
1128   */
1129   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1130     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1131     PetscFunctionReturn(0);
1132   }
1133 
1134   /* determine ownership of all rows */
1135   m          = M/size + ((M % size) > rank);
1136   rowners    = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1137   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1138   rowners[0] = 0;
1139   for ( i=2; i<=size; i++ ) {
1140     rowners[i] += rowners[i-1];
1141   }
1142   rstart = rowners[rank];
1143   rend   = rowners[rank+1];
1144 
1145   /* distribute row lengths to all processors */
1146   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1147   offlens = ourlens + (rend-rstart);
1148   if (!rank) {
1149     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1150     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1151     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1152     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1153     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1154     PetscFree(sndcounts);
1155   } else {
1156     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1157   }
1158 
1159   if (!rank) {
1160     /* calculate the number of nonzeros on each processor */
1161     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1162     PetscMemzero(procsnz,size*sizeof(int));
1163     for ( i=0; i<size; i++ ) {
1164       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1165         procsnz[i] += rowlengths[j];
1166       }
1167     }
1168     PetscFree(rowlengths);
1169 
1170     /* determine max buffer needed and allocate it */
1171     maxnz = 0;
1172     for ( i=0; i<size; i++ ) {
1173       maxnz = PetscMax(maxnz,procsnz[i]);
1174     }
1175     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1176 
1177     /* read in my part of the matrix column indices  */
1178     nz = procsnz[0];
1179     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1180     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1181 
1182     /* read in every one elses and ship off */
1183     for ( i=1; i<size; i++ ) {
1184       nz   = procsnz[i];
1185       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1186       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1187     }
1188     PetscFree(cols);
1189   } else {
1190     /* determine buffer space needed for message */
1191     nz = 0;
1192     for ( i=0; i<m; i++ ) {
1193       nz += ourlens[i];
1194     }
1195     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1196 
1197     /* receive message of column indices*/
1198     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1199     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1200     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1201   }
1202 
1203   /* loop over local rows, determining number of off diagonal entries */
1204   PetscMemzero(offlens,m*sizeof(int));
1205   jj = 0;
1206   for ( i=0; i<m; i++ ) {
1207     for ( j=0; j<ourlens[i]; j++ ) {
1208       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1209       jj++;
1210     }
1211   }
1212 
1213   /* create our matrix */
1214   for ( i=0; i<m; i++ ) {
1215     ourlens[i] -= offlens[i];
1216   }
1217   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1218   A = *newmat;
1219   for ( i=0; i<m; i++ ) {
1220     ourlens[i] += offlens[i];
1221   }
1222 
1223   if (!rank) {
1224     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1225 
1226     /* read in my part of the matrix numerical values  */
1227     nz = procsnz[0];
1228     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1229 
1230     /* insert into matrix */
1231     jj      = rstart;
1232     smycols = mycols;
1233     svals   = vals;
1234     for ( i=0; i<m; i++ ) {
1235       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1236       smycols += ourlens[i];
1237       svals   += ourlens[i];
1238       jj++;
1239     }
1240 
1241     /* read in other processors and ship out */
1242     for ( i=1; i<size; i++ ) {
1243       nz   = procsnz[i];
1244       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1245       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1246     }
1247     PetscFree(procsnz);
1248   } else {
1249     /* receive numeric values */
1250     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1251 
1252     /* receive message of values*/
1253     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1254     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1255     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1256 
1257     /* insert into matrix */
1258     jj      = rstart;
1259     smycols = mycols;
1260     svals   = vals;
1261     for ( i=0; i<m; i++ ) {
1262       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1263       smycols += ourlens[i];
1264       svals   += ourlens[i];
1265       jj++;
1266     }
1267   }
1268   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1269 
1270   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1271   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 
1276 
1277 
1278 
1279