xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 606d414c6e034e02e67059b83ebaefc3ebe99698)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.116 1999/06/09 23:19:38 balay Exp balay $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 extern int MatSetUpMultiply_MPIDense(Mat);
13 
14 #undef __FUNC__
15 #define __FUNC__ "MatSetValues_MPIDense"
16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
17 {
18   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
19   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
20   int          roworiented = A->roworiented;
21 
22   PetscFunctionBegin;
23   for ( i=0; i<m; i++ ) {
24     if (idxm[i] < 0) continue;
25     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
26     if (idxm[i] >= rstart && idxm[i] < rend) {
27       row = idxm[i] - rstart;
28       if (roworiented) {
29         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
30       } else {
31         for ( j=0; j<n; j++ ) {
32           if (idxn[j] < 0) continue;
33           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
34           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
35         }
36       }
37     } else {
38       if ( !A->donotstash) {
39         if (roworiented) {
40           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
41         } else {
42           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
43         }
44       }
45     }
46   }
47   PetscFunctionReturn(0);
48 }
49 
50 #undef __FUNC__
51 #define __FUNC__ "MatGetValues_MPIDense"
52 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
53 {
54   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
55   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
56 
57   PetscFunctionBegin;
58   for ( i=0; i<m; i++ ) {
59     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
60     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
61     if (idxm[i] >= rstart && idxm[i] < rend) {
62       row = idxm[i] - rstart;
63       for ( j=0; j<n; j++ ) {
64         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
65         if (idxn[j] >= mdn->N) {
66           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
67         }
68         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
69       }
70     } else {
71       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetArray_MPIDense"
79 int MatGetArray_MPIDense(Mat A,Scalar **array)
80 {
81   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82   int          ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNC__
90 #define __FUNC__ "MatRestoreArray_MPIDense"
91 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
92 {
93   PetscFunctionBegin;
94   PetscFunctionReturn(0);
95 }
96 
97 #undef __FUNC__
98 #define __FUNC__ "MatAssemblyBegin_MPIDense"
99 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
100 {
101   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
102   MPI_Comm     comm = mat->comm;
103   int          ierr,nstash,reallocs;
104   InsertMode   addv;
105 
106   PetscFunctionBegin;
107   /* make sure all processors are either in INSERTMODE or ADDMODE */
108   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
109   if (addv == (ADD_VALUES|INSERT_VALUES)) {
110     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
111   }
112   mat->insertmode = addv; /* in case this processor had no cache */
113 
114   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
115   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
116   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
117            nstash,reallocs);
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNC__
122 #define __FUNC__ "MatAssemblyEnd_MPIDense"
123 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
124 {
125   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
126   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
127   Scalar       *val;
128   InsertMode   addv=mat->insertmode;
129 
130   PetscFunctionBegin;
131   /*  wait on receives */
132   while (1) {
133     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
134     if (!flg) break;
135 
136     for ( i=0; i<n; ) {
137       /* Now identify the consecutive vals belonging to the same row */
138       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
139       if (j < n) ncols = j-i;
140       else       ncols = n-i;
141       /* Now assemble all these values with a single function call */
142       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
143       i = j;
144     }
145   }
146   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
147 
148   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
149   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
150 
151   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
152     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
153   }
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNC__
158 #define __FUNC__ "MatZeroEntries_MPIDense"
159 int MatZeroEntries_MPIDense(Mat A)
160 {
161   int          ierr;
162   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
163 
164   PetscFunctionBegin;
165   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNC__
170 #define __FUNC__ "MatGetBlockSize_MPIDense"
171 int MatGetBlockSize_MPIDense(Mat A,int *bs)
172 {
173   PetscFunctionBegin;
174   *bs = 1;
175   PetscFunctionReturn(0);
176 }
177 
178 /* the code does not do the diagonal entries correctly unless the
179    matrix is square and the column and row owerships are identical.
180    This is a BUG. The only way to fix it seems to be to access
181    mdn->A and mdn->B directly and not through the MatZeroRows()
182    routine.
183 */
184 #undef __FUNC__
185 #define __FUNC__ "MatZeroRows_MPIDense"
186 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
187 {
188   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
189   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
190   int            *procs,*nprocs,j,found,idx,nsends,*work;
191   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
192   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
193   int            *lens,imdex,*lrows,*values;
194   MPI_Comm       comm = A->comm;
195   MPI_Request    *send_waits,*recv_waits;
196   MPI_Status     recv_status,*send_status;
197   IS             istmp;
198 
199   PetscFunctionBegin;
200   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
201   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
202 
203   /*  first count number of contributors to each processor */
204   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
205   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
206   procs  = nprocs + size;
207   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
208   for ( i=0; i<N; i++ ) {
209     idx = rows[i];
210     found = 0;
211     for ( j=0; j<size; j++ ) {
212       if (idx >= owners[j] && idx < owners[j+1]) {
213         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
214       }
215     }
216     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
217   }
218   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
219 
220   /* inform other processors of number of messages and max length*/
221   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
222   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
223   nrecvs = work[rank];
224   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
225   nmax   = work[rank];
226   ierr = PetscFree(work);CHKERRQ(ierr);
227 
228   /* post receives:   */
229   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
230   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
231   for ( i=0; i<nrecvs; i++ ) {
232     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
233   }
234 
235   /* do sends:
236       1) starts[i] gives the starting index in svalues for stuff going to
237          the ith processor
238   */
239   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
240   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
241   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
242   starts[0]  = 0;
243   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
244   for ( i=0; i<N; i++ ) {
245     svalues[starts[owner[i]]++] = rows[i];
246   }
247   ISRestoreIndices(is,&rows);
248 
249   starts[0] = 0;
250   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
251   count = 0;
252   for ( i=0; i<size; i++ ) {
253     if (procs[i]) {
254       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
255     }
256   }
257   ierr = PetscFree(starts);CHKERRQ(ierr);
258 
259   base = owners[rank];
260 
261   /*  wait on receives */
262   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
263   source = lens + nrecvs;
264   count  = nrecvs; slen = 0;
265   while (count) {
266     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
267     /* unpack receives into our local space */
268     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
269     source[imdex]  = recv_status.MPI_SOURCE;
270     lens[imdex]  = n;
271     slen += n;
272     count--;
273   }
274   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
275 
276   /* move the data into the send scatter */
277   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
278   count = 0;
279   for ( i=0; i<nrecvs; i++ ) {
280     values = rvalues + i*nmax;
281     for ( j=0; j<lens[i]; j++ ) {
282       lrows[count++] = values[j] - base;
283     }
284   }
285   ierr = PetscFree(rvalues);CHKERRQ(ierr);
286   ierr = PetscFree(lens);CHKERRQ(ierr);
287   ierr = PetscFree(owner);CHKERRQ(ierr);
288   ierr = PetscFree(nprocs);CHKERRQ(ierr);
289 
290   /* actually zap the local rows */
291   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
292   PLogObjectParent(A,istmp);
293   ierr = PetscFree(lrows);CHKERRQ(ierr);
294   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
295   ierr = ISDestroy(istmp);CHKERRQ(ierr);
296 
297   /* wait on sends */
298   if (nsends) {
299     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
300     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
301     ierr = PetscFree(send_status);CHKERRQ(ierr);
302   }
303   ierr = PetscFree(send_waits);CHKERRQ(ierr);
304   ierr = PetscFree(svalues);CHKERRQ(ierr);
305 
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNC__
310 #define __FUNC__ "MatMult_MPIDense"
311 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
312 {
313   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
314   int          ierr;
315 
316   PetscFunctionBegin;
317   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
318   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
319   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNC__
324 #define __FUNC__ "MatMultAdd_MPIDense"
325 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
326 {
327   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
328   int          ierr;
329 
330   PetscFunctionBegin;
331   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
332   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
333   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNC__
338 #define __FUNC__ "MatMultTrans_MPIDense"
339 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
340 {
341   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
342   int          ierr;
343   Scalar       zero = 0.0;
344 
345   PetscFunctionBegin;
346   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
347   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
348   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
349   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNC__
354 #define __FUNC__ "MatMultTransAdd_MPIDense"
355 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
356 {
357   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
358   int          ierr;
359 
360   PetscFunctionBegin;
361   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
362   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
363   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
364   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNC__
369 #define __FUNC__ "MatGetDiagonal_MPIDense"
370 int MatGetDiagonal_MPIDense(Mat A,Vec v)
371 {
372   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
373   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
374   int          ierr, len, i, n, m = a->m, radd;
375   Scalar       *x, zero = 0.0;
376 
377   PetscFunctionBegin;
378   VecSet(&zero,v);
379   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
380   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
381   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
382   len = PetscMin(aloc->m,aloc->n);
383   radd = a->rstart*m;
384   for ( i=0; i<len; i++ ) {
385     x[i] = aloc->v[radd + i*m + i];
386   }
387   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNC__
392 #define __FUNC__ "MatDestroy_MPIDense"
393 int MatDestroy_MPIDense(Mat mat)
394 {
395   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
396   int          ierr;
397 
398   PetscFunctionBegin;
399   if (--mat->refct > 0) PetscFunctionReturn(0);
400 
401   if (mat->mapping) {
402     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
403   }
404   if (mat->bmapping) {
405     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
406   }
407 #if defined(PETSC_USE_LOG)
408   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
409 #endif
410   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
411   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
412   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
413   if (mdn->lvec)   VecDestroy(mdn->lvec);
414   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
415   if (mdn->factor) {
416     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
417     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
418     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
419     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
420   }
421   ierr = PetscFree(mdn);CHKERRQ(ierr);
422   if (mat->rmap) {
423     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
424   }
425   if (mat->cmap) {
426     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
427   }
428   PLogObjectDestroy(mat);
429   PetscHeaderDestroy(mat);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNC__
434 #define __FUNC__ "MatView_MPIDense_Binary"
435 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
436 {
437   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
438   int          ierr;
439 
440   PetscFunctionBegin;
441   if (mdn->size == 1) {
442     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
443   }
444   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNC__
449 #define __FUNC__ "MatView_MPIDense_ASCII"
450 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
451 {
452   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
453   int          ierr, format, size = mdn->size, rank = mdn->rank;
454   FILE         *fd;
455   ViewerType   vtype;
456 
457   PetscFunctionBegin;
458   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
459   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
460   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
461   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
462     MatInfo info;
463     ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
464     PetscSequentialPhaseBegin(mat->comm,1);
465       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
466          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
467       fflush(fd);
468     PetscSequentialPhaseEnd(mat->comm,1);
469     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
470     PetscFunctionReturn(0);
471   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
472     PetscFunctionReturn(0);
473   }
474 
475   if (size == 1) {
476     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
477   } else {
478     /* assemble the entire matrix onto first processor. */
479     Mat          A;
480     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
481     Scalar       *vals;
482     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
483 
484     if (!rank) {
485       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
486     } else {
487       ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
488     }
489     PLogObjectParent(mat,A);
490 
491     /* Copy the matrix ... This isn't the most efficient means,
492        but it's quick for now */
493     row = mdn->rstart; m = Amdn->m;
494     for ( i=0; i<m; i++ ) {
495       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
496       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
497       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
498       row++;
499     }
500 
501     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
502     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
503     if (!rank) {
504       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr);
505     }
506     ierr = MatDestroy(A);CHKERRQ(ierr);
507   }
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNC__
512 #define __FUNC__ "MatView_MPIDense"
513 int MatView_MPIDense(Mat mat,Viewer viewer)
514 {
515   int          ierr;
516   ViewerType   vtype;
517 
518   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
519   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
520     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
521   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
522     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
523   } else {
524     SETERRQ(1,1,"Viewer type not supported by PETSc object");
525   }
526   PetscFunctionReturn(0);
527 }
528 
529 #undef __FUNC__
530 #define __FUNC__ "MatGetInfo_MPIDense"
531 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
532 {
533   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
534   Mat          mdn = mat->A;
535   int          ierr;
536   double       isend[5], irecv[5];
537 
538   PetscFunctionBegin;
539   info->rows_global    = (double)mat->M;
540   info->columns_global = (double)mat->N;
541   info->rows_local     = (double)mat->m;
542   info->columns_local  = (double)mat->N;
543   info->block_size     = 1.0;
544   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
545   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
546   isend[3] = info->memory;  isend[4] = info->mallocs;
547   if (flag == MAT_LOCAL) {
548     info->nz_used      = isend[0];
549     info->nz_allocated = isend[1];
550     info->nz_unneeded  = isend[2];
551     info->memory       = isend[3];
552     info->mallocs      = isend[4];
553   } else if (flag == MAT_GLOBAL_MAX) {
554     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
555     info->nz_used      = irecv[0];
556     info->nz_allocated = irecv[1];
557     info->nz_unneeded  = irecv[2];
558     info->memory       = irecv[3];
559     info->mallocs      = irecv[4];
560   } else if (flag == MAT_GLOBAL_SUM) {
561     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
562     info->nz_used      = irecv[0];
563     info->nz_allocated = irecv[1];
564     info->nz_unneeded  = irecv[2];
565     info->memory       = irecv[3];
566     info->mallocs      = irecv[4];
567   }
568   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
569   info->fill_ratio_needed = 0;
570   info->factor_mallocs    = 0;
571   PetscFunctionReturn(0);
572 }
573 
574 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
575    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
576    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
577    extern int MatSolve_MPIDense(Mat,Vec,Vec);
578    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
579    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
580    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
581 
582 #undef __FUNC__
583 #define __FUNC__ "MatSetOption_MPIDense"
584 int MatSetOption_MPIDense(Mat A,MatOption op)
585 {
586   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
587 
588   PetscFunctionBegin;
589   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
590       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
591       op == MAT_NEW_NONZERO_LOCATION_ERR ||
592       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
593       op == MAT_COLUMNS_SORTED ||
594       op == MAT_COLUMNS_UNSORTED) {
595         MatSetOption(a->A,op);
596   } else if (op == MAT_ROW_ORIENTED) {
597         a->roworiented = 1;
598         MatSetOption(a->A,op);
599   } else if (op == MAT_ROWS_SORTED ||
600              op == MAT_ROWS_UNSORTED ||
601              op == MAT_SYMMETRIC ||
602              op == MAT_STRUCTURALLY_SYMMETRIC ||
603              op == MAT_YES_NEW_DIAGONALS ||
604              op == MAT_USE_HASH_TABLE) {
605     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
606   } else if (op == MAT_COLUMN_ORIENTED) {
607     a->roworiented = 0; MatSetOption(a->A,op);
608   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
609     a->donotstash = 1;
610   } else if (op == MAT_NO_NEW_DIAGONALS) {
611     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
612   } else {
613     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
614   }
615   PetscFunctionReturn(0);
616 }
617 
618 #undef __FUNC__
619 #define __FUNC__ "MatGetSize_MPIDense"
620 int MatGetSize_MPIDense(Mat A,int *m,int *n)
621 {
622   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
623 
624   PetscFunctionBegin;
625   *m = mat->M; *n = mat->N;
626   PetscFunctionReturn(0);
627 }
628 
629 #undef __FUNC__
630 #define __FUNC__ "MatGetLocalSize_MPIDense"
631 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
632 {
633   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
634 
635   PetscFunctionBegin;
636   *m = mat->m; *n = mat->N;
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNC__
641 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
642 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
643 {
644   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
645 
646   PetscFunctionBegin;
647   *m = mat->rstart; *n = mat->rend;
648   PetscFunctionReturn(0);
649 }
650 
651 #undef __FUNC__
652 #define __FUNC__ "MatGetRow_MPIDense"
653 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
654 {
655   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
656   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
657 
658   PetscFunctionBegin;
659   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
660   lrow = row - rstart;
661   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNC__
666 #define __FUNC__ "MatRestoreRow_MPIDense"
667 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
668 {
669   int ierr;
670 
671   PetscFunctionBegin;
672   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
673   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
674   PetscFunctionReturn(0);
675 }
676 
677 #undef __FUNC__
678 #define __FUNC__ "MatNorm_MPIDense"
679 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
680 {
681   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
682   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
683   int          ierr, i, j;
684   double       sum = 0.0;
685   Scalar       *v = mat->v;
686 
687   PetscFunctionBegin;
688   if (mdn->size == 1) {
689     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
690   } else {
691     if (type == NORM_FROBENIUS) {
692       for (i=0; i<mat->n*mat->m; i++ ) {
693 #if defined(PETSC_USE_COMPLEX)
694         sum += PetscReal(PetscConj(*v)*(*v)); v++;
695 #else
696         sum += (*v)*(*v); v++;
697 #endif
698       }
699       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
700       *norm = sqrt(*norm);
701       PLogFlops(2*mat->n*mat->m);
702     } else if (type == NORM_1) {
703       double *tmp, *tmp2;
704       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
705       tmp2 = tmp + mdn->N;
706       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
707       *norm = 0.0;
708       v = mat->v;
709       for ( j=0; j<mat->n; j++ ) {
710         for ( i=0; i<mat->m; i++ ) {
711           tmp[j] += PetscAbsScalar(*v);  v++;
712         }
713       }
714       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
715       for ( j=0; j<mdn->N; j++ ) {
716         if (tmp2[j] > *norm) *norm = tmp2[j];
717       }
718       ierr = PetscFree(tmp);CHKERRQ(ierr);
719       PLogFlops(mat->n*mat->m);
720     } else if (type == NORM_INFINITY) { /* max row norm */
721       double ntemp;
722       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
723       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
724     } else {
725       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
726     }
727   }
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNC__
732 #define __FUNC__ "MatTranspose_MPIDense"
733 int MatTranspose_MPIDense(Mat A,Mat *matout)
734 {
735   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
736   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
737   Mat          B;
738   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
739   int          j, i, ierr;
740   Scalar       *v;
741 
742   PetscFunctionBegin;
743   if (matout == PETSC_NULL && M != N) {
744     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
745   }
746   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
747 
748   m = Aloc->m; n = Aloc->n; v = Aloc->v;
749   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
750   for ( j=0; j<n; j++ ) {
751     for (i=0; i<m; i++) rwork[i] = rstart + i;
752     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
753     v   += m;
754   }
755   ierr = PetscFree(rwork);CHKERRQ(ierr);
756   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
757   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
758   if (matout != PETSC_NULL) {
759     *matout = B;
760   } else {
761     PetscOps *Abops;
762     MatOps   Aops;
763 
764     /* This isn't really an in-place transpose, but free data struct from a */
765     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
766     ierr = MatDestroy(a->A);CHKERRQ(ierr);
767     if (a->lvec) VecDestroy(a->lvec);
768     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
769     ierr = PetscFree(a);CHKERRQ(ierr);
770 
771     /*
772          This is horrible, horrible code. We need to keep the
773       A pointers for the bops and ops but copy everything
774       else from C.
775     */
776     Abops   = A->bops;
777     Aops    = A->ops;
778     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
779     A->bops = Abops;
780     A->ops  = Aops;
781 
782     PetscHeaderDestroy(B);
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 #include "pinclude/blaslapack.h"
788 #undef __FUNC__
789 #define __FUNC__ "MatScale_MPIDense"
790 int MatScale_MPIDense(Scalar *alpha,Mat inA)
791 {
792   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
793   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
794   int          one = 1, nz;
795 
796   PetscFunctionBegin;
797   nz = a->m*a->n;
798   BLscal_( &nz, alpha, a->v, &one );
799   PLogFlops(nz);
800   PetscFunctionReturn(0);
801 }
802 
803 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
804 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
805 
806 /* -------------------------------------------------------------------*/
807 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
808        MatGetRow_MPIDense,
809        MatRestoreRow_MPIDense,
810        MatMult_MPIDense,
811        MatMultAdd_MPIDense,
812        MatMultTrans_MPIDense,
813        MatMultTransAdd_MPIDense,
814        0,
815        0,
816        0,
817        0,
818        0,
819        0,
820        0,
821        MatTranspose_MPIDense,
822        MatGetInfo_MPIDense,0,
823        MatGetDiagonal_MPIDense,
824        0,
825        MatNorm_MPIDense,
826        MatAssemblyBegin_MPIDense,
827        MatAssemblyEnd_MPIDense,
828        0,
829        MatSetOption_MPIDense,
830        MatZeroEntries_MPIDense,
831        MatZeroRows_MPIDense,
832        0,
833        0,
834        0,
835        0,
836        MatGetSize_MPIDense,
837        MatGetLocalSize_MPIDense,
838        MatGetOwnershipRange_MPIDense,
839        0,
840        0,
841        MatGetArray_MPIDense,
842        MatRestoreArray_MPIDense,
843        MatDuplicate_MPIDense,
844        0,
845        0,
846        0,
847        0,
848        0,
849        MatGetSubMatrices_MPIDense,
850        0,
851        MatGetValues_MPIDense,
852        0,
853        0,
854        MatScale_MPIDense,
855        0,
856        0,
857        0,
858        MatGetBlockSize_MPIDense,
859        0,
860        0,
861        0,
862        0,
863        0,
864        0,
865        0,
866        0,
867        0,
868        0,
869        0,
870        0,
871        MatGetMaps_Petsc};
872 
873 #undef __FUNC__
874 #define __FUNC__ "MatCreateMPIDense"
875 /*@C
876    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
877 
878    Collective on MPI_Comm
879 
880    Input Parameters:
881 +  comm - MPI communicator
882 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
883 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
884 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
885 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
886 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
887    to control all matrix memory allocation.
888 
889    Output Parameter:
890 .  A - the matrix
891 
892    Notes:
893    The dense format is fully compatible with standard Fortran 77
894    storage by columns.
895 
896    The data input variable is intended primarily for Fortran programmers
897    who wish to allocate their own matrix memory space.  Most users should
898    set data=PETSC_NULL.
899 
900    The user MUST specify either the local or global matrix dimensions
901    (possibly both).
902 
903    Currently, the only parallel dense matrix decomposition is by rows,
904    so that n=N and each submatrix owns all of the global columns.
905 
906    Level: intermediate
907 
908 .keywords: matrix, dense, parallel
909 
910 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
911 @*/
912 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
913 {
914   Mat          mat;
915   Mat_MPIDense *a;
916   int          ierr, i,flg;
917 
918   PetscFunctionBegin;
919   /* Note:  For now, when data is specified above, this assumes the user correctly
920    allocates the local dense storage space.  We should add error checking. */
921 
922   *A = 0;
923   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
924   PLogObjectCreate(mat);
925   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
926   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
927   mat->ops->destroy = MatDestroy_MPIDense;
928   mat->ops->view    = MatView_MPIDense;
929   mat->factor       = 0;
930   mat->mapping      = 0;
931 
932   a->factor       = 0;
933   mat->insertmode = NOT_SET_VALUES;
934   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
935   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
936 
937   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
938 
939   /*
940      The computation of n is wrong below, n should represent the number of local
941      rows in the right (column vector)
942   */
943 
944   /* each row stores all columns */
945   if (N == PETSC_DECIDE) N = n;
946   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
947   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
948   a->N = mat->N = N;
949   a->M = mat->M = M;
950   a->m = mat->m = m;
951   a->n = mat->n = n;
952 
953   /* the information in the maps duplicates the information computed below, eventually
954      we should remove the duplicate information that is not contained in the maps */
955   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
956   ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr);
957 
958   /* build local table of row and column ownerships */
959   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
960   a->cowners = a->rowners + a->size + 1;
961   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
962   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
963   a->rowners[0] = 0;
964   for ( i=2; i<=a->size; i++ ) {
965     a->rowners[i] += a->rowners[i-1];
966   }
967   a->rstart = a->rowners[a->rank];
968   a->rend   = a->rowners[a->rank+1];
969   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
970   a->cowners[0] = 0;
971   for ( i=2; i<=a->size; i++ ) {
972     a->cowners[i] += a->cowners[i-1];
973   }
974 
975   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
976   PLogObjectParent(mat,a->A);
977 
978   /* build cache for off array entries formed */
979   a->donotstash = 0;
980   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
981 
982   /* stuff used for matrix vector multiply */
983   a->lvec        = 0;
984   a->Mvctx       = 0;
985   a->roworiented = 1;
986 
987   *A = mat;
988   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
989   if (flg) {
990     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
991   }
992   PetscFunctionReturn(0);
993 }
994 
995 #undef __FUNC__
996 #define __FUNC__ "MatDuplicate_MPIDense"
997 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
998 {
999   Mat          mat;
1000   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1001   int          ierr;
1002   FactorCtx    *factor;
1003 
1004   PetscFunctionBegin;
1005   *newmat       = 0;
1006   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1007   PLogObjectCreate(mat);
1008   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1009   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1010   mat->ops->destroy = MatDestroy_MPIDense;
1011   mat->ops->view    = MatView_MPIDense;
1012   mat->factor       = A->factor;
1013   mat->assembled    = PETSC_TRUE;
1014 
1015   a->m = mat->m = oldmat->m;
1016   a->n = mat->n = oldmat->n;
1017   a->M = mat->M = oldmat->M;
1018   a->N = mat->N = oldmat->N;
1019   if (oldmat->factor) {
1020     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1021     /* copy factor contents ... add this code! */
1022   } else a->factor = 0;
1023 
1024   a->rstart       = oldmat->rstart;
1025   a->rend         = oldmat->rend;
1026   a->size         = oldmat->size;
1027   a->rank         = oldmat->rank;
1028   mat->insertmode = NOT_SET_VALUES;
1029   a->donotstash   = oldmat->donotstash;
1030   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1031   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1032   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1033   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1034 
1035   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1036   PLogObjectParent(mat,a->lvec);
1037   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1038   PLogObjectParent(mat,a->Mvctx);
1039   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1040   PLogObjectParent(mat,a->A);
1041   *newmat = mat;
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 #include "sys.h"
1046 
1047 #undef __FUNC__
1048 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1049 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1050 {
1051   int        *rowners, i,size,rank,m,ierr,nz,j;
1052   Scalar     *array,*vals,*vals_ptr;
1053   MPI_Status status;
1054 
1055   PetscFunctionBegin;
1056   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1057   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1058 
1059   /* determine ownership of all rows */
1060   m          = M/size + ((M % size) > rank);
1061   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1062   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1063   rowners[0] = 0;
1064   for ( i=2; i<=size; i++ ) {
1065     rowners[i] += rowners[i-1];
1066   }
1067 
1068   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1069   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1070 
1071   if (!rank) {
1072     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1073 
1074     /* read in my part of the matrix numerical values  */
1075     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1076 
1077     /* insert into matrix-by row (this is why cannot directly read into array */
1078     vals_ptr = vals;
1079     for ( i=0; i<m; i++ ) {
1080       for ( j=0; j<N; j++ ) {
1081         array[i + j*m] = *vals_ptr++;
1082       }
1083     }
1084 
1085     /* read in other processors and ship out */
1086     for ( i=1; i<size; i++ ) {
1087       nz   = (rowners[i+1] - rowners[i])*N;
1088       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1089       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1090     }
1091   } else {
1092     /* receive numeric values */
1093     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1094 
1095     /* receive message of values*/
1096     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1097 
1098     /* insert into matrix-by row (this is why cannot directly read into array */
1099     vals_ptr = vals;
1100     for ( i=0; i<m; i++ ) {
1101       for ( j=0; j<N; j++ ) {
1102         array[i + j*m] = *vals_ptr++;
1103       }
1104     }
1105   }
1106   ierr = PetscFree(rowners);CHKERRQ(ierr);
1107   ierr = PetscFree(vals);CHKERRQ(ierr);
1108   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1109   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 
1114 #undef __FUNC__
1115 #define __FUNC__ "MatLoad_MPIDense"
1116 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1117 {
1118   Mat          A;
1119   Scalar       *vals,*svals;
1120   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1121   MPI_Status   status;
1122   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1123   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1124   int          tag = ((PetscObject)viewer)->tag;
1125   int          i, nz, ierr, j,rstart, rend, fd;
1126 
1127   PetscFunctionBegin;
1128   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1129   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1130   if (!rank) {
1131     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1132     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1133     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1134   }
1135 
1136   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1137   M = header[1]; N = header[2]; nz = header[3];
1138 
1139   /*
1140        Handle case where matrix is stored on disk as a dense matrix
1141   */
1142   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1143     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1144     PetscFunctionReturn(0);
1145   }
1146 
1147   /* determine ownership of all rows */
1148   m          = M/size + ((M % size) > rank);
1149   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1150   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1151   rowners[0] = 0;
1152   for ( i=2; i<=size; i++ ) {
1153     rowners[i] += rowners[i-1];
1154   }
1155   rstart = rowners[rank];
1156   rend   = rowners[rank+1];
1157 
1158   /* distribute row lengths to all processors */
1159   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1160   offlens = ourlens + (rend-rstart);
1161   if (!rank) {
1162     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1163     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1164     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1165     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1166     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1167     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1168   } else {
1169     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1170   }
1171 
1172   if (!rank) {
1173     /* calculate the number of nonzeros on each processor */
1174     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1175     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1176     for ( i=0; i<size; i++ ) {
1177       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1178         procsnz[i] += rowlengths[j];
1179       }
1180     }
1181     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1182 
1183     /* determine max buffer needed and allocate it */
1184     maxnz = 0;
1185     for ( i=0; i<size; i++ ) {
1186       maxnz = PetscMax(maxnz,procsnz[i]);
1187     }
1188     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1189 
1190     /* read in my part of the matrix column indices  */
1191     nz = procsnz[0];
1192     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1193     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1194 
1195     /* read in every one elses and ship off */
1196     for ( i=1; i<size; i++ ) {
1197       nz   = procsnz[i];
1198       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1199       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1200     }
1201     ierr = PetscFree(cols);CHKERRQ(ierr);
1202   } else {
1203     /* determine buffer space needed for message */
1204     nz = 0;
1205     for ( i=0; i<m; i++ ) {
1206       nz += ourlens[i];
1207     }
1208     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1209 
1210     /* receive message of column indices*/
1211     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1212     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1213     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1214   }
1215 
1216   /* loop over local rows, determining number of off diagonal entries */
1217   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1218   jj = 0;
1219   for ( i=0; i<m; i++ ) {
1220     for ( j=0; j<ourlens[i]; j++ ) {
1221       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1222       jj++;
1223     }
1224   }
1225 
1226   /* create our matrix */
1227   for ( i=0; i<m; i++ ) {
1228     ourlens[i] -= offlens[i];
1229   }
1230   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1231   A = *newmat;
1232   for ( i=0; i<m; i++ ) {
1233     ourlens[i] += offlens[i];
1234   }
1235 
1236   if (!rank) {
1237     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1238 
1239     /* read in my part of the matrix numerical values  */
1240     nz = procsnz[0];
1241     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1242 
1243     /* insert into matrix */
1244     jj      = rstart;
1245     smycols = mycols;
1246     svals   = vals;
1247     for ( i=0; i<m; i++ ) {
1248       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1249       smycols += ourlens[i];
1250       svals   += ourlens[i];
1251       jj++;
1252     }
1253 
1254     /* read in other processors and ship out */
1255     for ( i=1; i<size; i++ ) {
1256       nz   = procsnz[i];
1257       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1258       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1259     }
1260     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1261   } else {
1262     /* receive numeric values */
1263     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1264 
1265     /* receive message of values*/
1266     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1267     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1268     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1269 
1270     /* insert into matrix */
1271     jj      = rstart;
1272     smycols = mycols;
1273     svals   = vals;
1274     for ( i=0; i<m; i++ ) {
1275       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1276       smycols += ourlens[i];
1277       svals   += ourlens[i];
1278       jj++;
1279     }
1280   }
1281   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1282   ierr = PetscFree(vals);CHKERRQ(ierr);
1283   ierr = PetscFree(mycols);CHKERRQ(ierr);
1284   ierr = PetscFree(rowners);CHKERRQ(ierr);
1285 
1286   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1287   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 
1292 
1293 
1294 
1295