xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision cf23486d5c98bd4e00855d1002c8d80fadf08e91)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.117 1999/06/30 23:50:57 balay Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 extern int MatSetUpMultiply_MPIDense(Mat);
13 
14 #undef __FUNC__
15 #define __FUNC__ "MatSetValues_MPIDense"
16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
17 {
18   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
19   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
20   int          roworiented = A->roworiented;
21 
22   PetscFunctionBegin;
23   for ( i=0; i<m; i++ ) {
24     if (idxm[i] < 0) continue;
25     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
26     if (idxm[i] >= rstart && idxm[i] < rend) {
27       row = idxm[i] - rstart;
28       if (roworiented) {
29         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
30       } else {
31         for ( j=0; j<n; j++ ) {
32           if (idxn[j] < 0) continue;
33           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
34           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
35         }
36       }
37     } else {
38       if ( !A->donotstash) {
39         if (roworiented) {
40           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
41         } else {
42           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
43         }
44       }
45     }
46   }
47   PetscFunctionReturn(0);
48 }
49 
50 #undef __FUNC__
51 #define __FUNC__ "MatGetValues_MPIDense"
52 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
53 {
54   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
55   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
56 
57   PetscFunctionBegin;
58   for ( i=0; i<m; i++ ) {
59     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
60     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
61     if (idxm[i] >= rstart && idxm[i] < rend) {
62       row = idxm[i] - rstart;
63       for ( j=0; j<n; j++ ) {
64         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
65         if (idxn[j] >= mdn->N) {
66           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
67         }
68         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
69       }
70     } else {
71       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetArray_MPIDense"
79 int MatGetArray_MPIDense(Mat A,Scalar **array)
80 {
81   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82   int          ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNC__
90 #define __FUNC__ "MatRestoreArray_MPIDense"
91 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
92 {
93   PetscFunctionBegin;
94   PetscFunctionReturn(0);
95 }
96 
97 #undef __FUNC__
98 #define __FUNC__ "MatAssemblyBegin_MPIDense"
99 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
100 {
101   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
102   MPI_Comm     comm = mat->comm;
103   int          ierr,nstash,reallocs;
104   InsertMode   addv;
105 
106   PetscFunctionBegin;
107   /* make sure all processors are either in INSERTMODE or ADDMODE */
108   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
109   if (addv == (ADD_VALUES|INSERT_VALUES)) {
110     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
111   }
112   mat->insertmode = addv; /* in case this processor had no cache */
113 
114   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
115   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
116   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
117            nstash,reallocs);
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNC__
122 #define __FUNC__ "MatAssemblyEnd_MPIDense"
123 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
124 {
125   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
126   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
127   Scalar       *val;
128   InsertMode   addv=mat->insertmode;
129 
130   PetscFunctionBegin;
131   /*  wait on receives */
132   while (1) {
133     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
134     if (!flg) break;
135 
136     for ( i=0; i<n; ) {
137       /* Now identify the consecutive vals belonging to the same row */
138       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
139       if (j < n) ncols = j-i;
140       else       ncols = n-i;
141       /* Now assemble all these values with a single function call */
142       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
143       i = j;
144     }
145   }
146   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
147 
148   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
149   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
150 
151   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
152     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
153   }
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNC__
158 #define __FUNC__ "MatZeroEntries_MPIDense"
159 int MatZeroEntries_MPIDense(Mat A)
160 {
161   int          ierr;
162   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
163 
164   PetscFunctionBegin;
165   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNC__
170 #define __FUNC__ "MatGetBlockSize_MPIDense"
171 int MatGetBlockSize_MPIDense(Mat A,int *bs)
172 {
173   PetscFunctionBegin;
174   *bs = 1;
175   PetscFunctionReturn(0);
176 }
177 
178 /* the code does not do the diagonal entries correctly unless the
179    matrix is square and the column and row owerships are identical.
180    This is a BUG. The only way to fix it seems to be to access
181    mdn->A and mdn->B directly and not through the MatZeroRows()
182    routine.
183 */
184 #undef __FUNC__
185 #define __FUNC__ "MatZeroRows_MPIDense"
186 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
187 {
188   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
189   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
190   int            *procs,*nprocs,j,found,idx,nsends,*work;
191   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
192   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
193   int            *lens,imdex,*lrows,*values;
194   MPI_Comm       comm = A->comm;
195   MPI_Request    *send_waits,*recv_waits;
196   MPI_Status     recv_status,*send_status;
197   IS             istmp;
198 
199   PetscFunctionBegin;
200   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
201   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
202 
203   /*  first count number of contributors to each processor */
204   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
205   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
206   procs  = nprocs + size;
207   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
208   for ( i=0; i<N; i++ ) {
209     idx = rows[i];
210     found = 0;
211     for ( j=0; j<size; j++ ) {
212       if (idx >= owners[j] && idx < owners[j+1]) {
213         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
214       }
215     }
216     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
217   }
218   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
219 
220   /* inform other processors of number of messages and max length*/
221   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
222   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
223   nrecvs = work[rank];
224   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
225   nmax   = work[rank];
226   ierr = PetscFree(work);CHKERRQ(ierr);
227 
228   /* post receives:   */
229   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
230   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
231   for ( i=0; i<nrecvs; i++ ) {
232     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
233   }
234 
235   /* do sends:
236       1) starts[i] gives the starting index in svalues for stuff going to
237          the ith processor
238   */
239   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
240   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
241   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
242   starts[0]  = 0;
243   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
244   for ( i=0; i<N; i++ ) {
245     svalues[starts[owner[i]]++] = rows[i];
246   }
247   ISRestoreIndices(is,&rows);
248 
249   starts[0] = 0;
250   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
251   count = 0;
252   for ( i=0; i<size; i++ ) {
253     if (procs[i]) {
254       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
255     }
256   }
257   ierr = PetscFree(starts);CHKERRQ(ierr);
258 
259   base = owners[rank];
260 
261   /*  wait on receives */
262   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
263   source = lens + nrecvs;
264   count  = nrecvs; slen = 0;
265   while (count) {
266     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
267     /* unpack receives into our local space */
268     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
269     source[imdex]  = recv_status.MPI_SOURCE;
270     lens[imdex]  = n;
271     slen += n;
272     count--;
273   }
274   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
275 
276   /* move the data into the send scatter */
277   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
278   count = 0;
279   for ( i=0; i<nrecvs; i++ ) {
280     values = rvalues + i*nmax;
281     for ( j=0; j<lens[i]; j++ ) {
282       lrows[count++] = values[j] - base;
283     }
284   }
285   ierr = PetscFree(rvalues);CHKERRQ(ierr);
286   ierr = PetscFree(lens);CHKERRQ(ierr);
287   ierr = PetscFree(owner);CHKERRQ(ierr);
288   ierr = PetscFree(nprocs);CHKERRQ(ierr);
289 
290   /* actually zap the local rows */
291   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
292   PLogObjectParent(A,istmp);
293   ierr = PetscFree(lrows);CHKERRQ(ierr);
294   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
295   ierr = ISDestroy(istmp);CHKERRQ(ierr);
296 
297   /* wait on sends */
298   if (nsends) {
299     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
300     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
301     ierr = PetscFree(send_status);CHKERRQ(ierr);
302   }
303   ierr = PetscFree(send_waits);CHKERRQ(ierr);
304   ierr = PetscFree(svalues);CHKERRQ(ierr);
305 
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNC__
310 #define __FUNC__ "MatMult_MPIDense"
311 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
312 {
313   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
314   int          ierr;
315 
316   PetscFunctionBegin;
317   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
318   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
319   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNC__
324 #define __FUNC__ "MatMultAdd_MPIDense"
325 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
326 {
327   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
328   int          ierr;
329 
330   PetscFunctionBegin;
331   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
332   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
333   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNC__
338 #define __FUNC__ "MatMultTrans_MPIDense"
339 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
340 {
341   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
342   int          ierr;
343   Scalar       zero = 0.0;
344 
345   PetscFunctionBegin;
346   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
347   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
348   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
349   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNC__
354 #define __FUNC__ "MatMultTransAdd_MPIDense"
355 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
356 {
357   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
358   int          ierr;
359 
360   PetscFunctionBegin;
361   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
362   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
363   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
364   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNC__
369 #define __FUNC__ "MatGetDiagonal_MPIDense"
370 int MatGetDiagonal_MPIDense(Mat A,Vec v)
371 {
372   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
373   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
374   int          ierr, len, i, n, m = a->m, radd;
375   Scalar       *x, zero = 0.0;
376 
377   PetscFunctionBegin;
378   VecSet(&zero,v);
379   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
380   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
381   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
382   len = PetscMin(aloc->m,aloc->n);
383   radd = a->rstart*m;
384   for ( i=0; i<len; i++ ) {
385     x[i] = aloc->v[radd + i*m + i];
386   }
387   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNC__
392 #define __FUNC__ "MatDestroy_MPIDense"
393 int MatDestroy_MPIDense(Mat mat)
394 {
395   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
396   int          ierr;
397 
398   PetscFunctionBegin;
399 
400   if (mat->mapping) {
401     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
402   }
403   if (mat->bmapping) {
404     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
405   }
406 #if defined(PETSC_USE_LOG)
407   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
408 #endif
409   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
410   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
411   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
412   if (mdn->lvec)   VecDestroy(mdn->lvec);
413   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
414   if (mdn->factor) {
415     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
416     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
417     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
418     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
419   }
420   ierr = PetscFree(mdn);CHKERRQ(ierr);
421   if (mat->rmap) {
422     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
423   }
424   if (mat->cmap) {
425     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
426   }
427   PLogObjectDestroy(mat);
428   PetscHeaderDestroy(mat);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNC__
433 #define __FUNC__ "MatView_MPIDense_Binary"
434 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
435 {
436   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
437   int          ierr;
438 
439   PetscFunctionBegin;
440   if (mdn->size == 1) {
441     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
442   }
443   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
444   PetscFunctionReturn(0);
445 }
446 
447 #undef __FUNC__
448 #define __FUNC__ "MatView_MPIDense_ASCII"
449 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
450 {
451   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
452   int          ierr, format, size = mdn->size, rank = mdn->rank;
453   FILE         *fd;
454   ViewerType   vtype;
455 
456   PetscFunctionBegin;
457   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
458   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
459   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
460   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
461     MatInfo info;
462     ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
463     PetscSequentialPhaseBegin(mat->comm,1);
464       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
465          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
466       fflush(fd);
467     PetscSequentialPhaseEnd(mat->comm,1);
468     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
469     PetscFunctionReturn(0);
470   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
471     PetscFunctionReturn(0);
472   }
473 
474   if (size == 1) {
475     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
476   } else {
477     /* assemble the entire matrix onto first processor. */
478     Mat          A;
479     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
480     Scalar       *vals;
481     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
482 
483     if (!rank) {
484       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
485     } else {
486       ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
487     }
488     PLogObjectParent(mat,A);
489 
490     /* Copy the matrix ... This isn't the most efficient means,
491        but it's quick for now */
492     row = mdn->rstart; m = Amdn->m;
493     for ( i=0; i<m; i++ ) {
494       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
495       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
496       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
497       row++;
498     }
499 
500     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
501     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
502     if (!rank) {
503       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr);
504     }
505     ierr = MatDestroy(A);CHKERRQ(ierr);
506   }
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNC__
511 #define __FUNC__ "MatView_MPIDense"
512 int MatView_MPIDense(Mat mat,Viewer viewer)
513 {
514   int          ierr;
515   ViewerType   vtype;
516 
517   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
518   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
519     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
520   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
521     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
522   } else {
523     SETERRQ(1,1,"Viewer type not supported by PETSc object");
524   }
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNC__
529 #define __FUNC__ "MatGetInfo_MPIDense"
530 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
531 {
532   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
533   Mat          mdn = mat->A;
534   int          ierr;
535   double       isend[5], irecv[5];
536 
537   PetscFunctionBegin;
538   info->rows_global    = (double)mat->M;
539   info->columns_global = (double)mat->N;
540   info->rows_local     = (double)mat->m;
541   info->columns_local  = (double)mat->N;
542   info->block_size     = 1.0;
543   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
544   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
545   isend[3] = info->memory;  isend[4] = info->mallocs;
546   if (flag == MAT_LOCAL) {
547     info->nz_used      = isend[0];
548     info->nz_allocated = isend[1];
549     info->nz_unneeded  = isend[2];
550     info->memory       = isend[3];
551     info->mallocs      = isend[4];
552   } else if (flag == MAT_GLOBAL_MAX) {
553     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
554     info->nz_used      = irecv[0];
555     info->nz_allocated = irecv[1];
556     info->nz_unneeded  = irecv[2];
557     info->memory       = irecv[3];
558     info->mallocs      = irecv[4];
559   } else if (flag == MAT_GLOBAL_SUM) {
560     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
561     info->nz_used      = irecv[0];
562     info->nz_allocated = irecv[1];
563     info->nz_unneeded  = irecv[2];
564     info->memory       = irecv[3];
565     info->mallocs      = irecv[4];
566   }
567   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
568   info->fill_ratio_needed = 0;
569   info->factor_mallocs    = 0;
570   PetscFunctionReturn(0);
571 }
572 
573 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
574    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
575    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
576    extern int MatSolve_MPIDense(Mat,Vec,Vec);
577    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
578    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
579    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
580 
581 #undef __FUNC__
582 #define __FUNC__ "MatSetOption_MPIDense"
583 int MatSetOption_MPIDense(Mat A,MatOption op)
584 {
585   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
586 
587   PetscFunctionBegin;
588   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
589       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
590       op == MAT_NEW_NONZERO_LOCATION_ERR ||
591       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
592       op == MAT_COLUMNS_SORTED ||
593       op == MAT_COLUMNS_UNSORTED) {
594         MatSetOption(a->A,op);
595   } else if (op == MAT_ROW_ORIENTED) {
596         a->roworiented = 1;
597         MatSetOption(a->A,op);
598   } else if (op == MAT_ROWS_SORTED ||
599              op == MAT_ROWS_UNSORTED ||
600              op == MAT_SYMMETRIC ||
601              op == MAT_STRUCTURALLY_SYMMETRIC ||
602              op == MAT_YES_NEW_DIAGONALS ||
603              op == MAT_USE_HASH_TABLE) {
604     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
605   } else if (op == MAT_COLUMN_ORIENTED) {
606     a->roworiented = 0; MatSetOption(a->A,op);
607   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
608     a->donotstash = 1;
609   } else if (op == MAT_NO_NEW_DIAGONALS) {
610     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
611   } else {
612     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
613   }
614   PetscFunctionReturn(0);
615 }
616 
617 #undef __FUNC__
618 #define __FUNC__ "MatGetSize_MPIDense"
619 int MatGetSize_MPIDense(Mat A,int *m,int *n)
620 {
621   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
622 
623   PetscFunctionBegin;
624   *m = mat->M; *n = mat->N;
625   PetscFunctionReturn(0);
626 }
627 
628 #undef __FUNC__
629 #define __FUNC__ "MatGetLocalSize_MPIDense"
630 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
631 {
632   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
633 
634   PetscFunctionBegin;
635   *m = mat->m; *n = mat->N;
636   PetscFunctionReturn(0);
637 }
638 
639 #undef __FUNC__
640 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
641 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
642 {
643   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
644 
645   PetscFunctionBegin;
646   *m = mat->rstart; *n = mat->rend;
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNC__
651 #define __FUNC__ "MatGetRow_MPIDense"
652 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
653 {
654   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
655   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
656 
657   PetscFunctionBegin;
658   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
659   lrow = row - rstart;
660   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNC__
665 #define __FUNC__ "MatRestoreRow_MPIDense"
666 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
667 {
668   int ierr;
669 
670   PetscFunctionBegin;
671   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
672   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNC__
677 #define __FUNC__ "MatNorm_MPIDense"
678 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
679 {
680   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
681   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
682   int          ierr, i, j;
683   double       sum = 0.0;
684   Scalar       *v = mat->v;
685 
686   PetscFunctionBegin;
687   if (mdn->size == 1) {
688     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
689   } else {
690     if (type == NORM_FROBENIUS) {
691       for (i=0; i<mat->n*mat->m; i++ ) {
692 #if defined(PETSC_USE_COMPLEX)
693         sum += PetscReal(PetscConj(*v)*(*v)); v++;
694 #else
695         sum += (*v)*(*v); v++;
696 #endif
697       }
698       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
699       *norm = sqrt(*norm);
700       PLogFlops(2*mat->n*mat->m);
701     } else if (type == NORM_1) {
702       double *tmp, *tmp2;
703       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
704       tmp2 = tmp + mdn->N;
705       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
706       *norm = 0.0;
707       v = mat->v;
708       for ( j=0; j<mat->n; j++ ) {
709         for ( i=0; i<mat->m; i++ ) {
710           tmp[j] += PetscAbsScalar(*v);  v++;
711         }
712       }
713       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
714       for ( j=0; j<mdn->N; j++ ) {
715         if (tmp2[j] > *norm) *norm = tmp2[j];
716       }
717       ierr = PetscFree(tmp);CHKERRQ(ierr);
718       PLogFlops(mat->n*mat->m);
719     } else if (type == NORM_INFINITY) { /* max row norm */
720       double ntemp;
721       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
722       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
723     } else {
724       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
725     }
726   }
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNC__
731 #define __FUNC__ "MatTranspose_MPIDense"
732 int MatTranspose_MPIDense(Mat A,Mat *matout)
733 {
734   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
735   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
736   Mat          B;
737   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
738   int          j, i, ierr;
739   Scalar       *v;
740 
741   PetscFunctionBegin;
742   if (matout == PETSC_NULL && M != N) {
743     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
744   }
745   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
746 
747   m = Aloc->m; n = Aloc->n; v = Aloc->v;
748   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
749   for ( j=0; j<n; j++ ) {
750     for (i=0; i<m; i++) rwork[i] = rstart + i;
751     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
752     v   += m;
753   }
754   ierr = PetscFree(rwork);CHKERRQ(ierr);
755   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
756   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
757   if (matout != PETSC_NULL) {
758     *matout = B;
759   } else {
760     PetscOps *Abops;
761     MatOps   Aops;
762 
763     /* This isn't really an in-place transpose, but free data struct from a */
764     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
765     ierr = MatDestroy(a->A);CHKERRQ(ierr);
766     if (a->lvec) VecDestroy(a->lvec);
767     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
768     ierr = PetscFree(a);CHKERRQ(ierr);
769 
770     /*
771          This is horrible, horrible code. We need to keep the
772       A pointers for the bops and ops but copy everything
773       else from C.
774     */
775     Abops   = A->bops;
776     Aops    = A->ops;
777     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
778     A->bops = Abops;
779     A->ops  = Aops;
780 
781     PetscHeaderDestroy(B);
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 #include "pinclude/blaslapack.h"
787 #undef __FUNC__
788 #define __FUNC__ "MatScale_MPIDense"
789 int MatScale_MPIDense(Scalar *alpha,Mat inA)
790 {
791   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
792   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
793   int          one = 1, nz;
794 
795   PetscFunctionBegin;
796   nz = a->m*a->n;
797   BLscal_( &nz, alpha, a->v, &one );
798   PLogFlops(nz);
799   PetscFunctionReturn(0);
800 }
801 
802 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
803 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
804 
805 /* -------------------------------------------------------------------*/
806 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
807        MatGetRow_MPIDense,
808        MatRestoreRow_MPIDense,
809        MatMult_MPIDense,
810        MatMultAdd_MPIDense,
811        MatMultTrans_MPIDense,
812        MatMultTransAdd_MPIDense,
813        0,
814        0,
815        0,
816        0,
817        0,
818        0,
819        0,
820        MatTranspose_MPIDense,
821        MatGetInfo_MPIDense,0,
822        MatGetDiagonal_MPIDense,
823        0,
824        MatNorm_MPIDense,
825        MatAssemblyBegin_MPIDense,
826        MatAssemblyEnd_MPIDense,
827        0,
828        MatSetOption_MPIDense,
829        MatZeroEntries_MPIDense,
830        MatZeroRows_MPIDense,
831        0,
832        0,
833        0,
834        0,
835        MatGetSize_MPIDense,
836        MatGetLocalSize_MPIDense,
837        MatGetOwnershipRange_MPIDense,
838        0,
839        0,
840        MatGetArray_MPIDense,
841        MatRestoreArray_MPIDense,
842        MatDuplicate_MPIDense,
843        0,
844        0,
845        0,
846        0,
847        0,
848        MatGetSubMatrices_MPIDense,
849        0,
850        MatGetValues_MPIDense,
851        0,
852        0,
853        MatScale_MPIDense,
854        0,
855        0,
856        0,
857        MatGetBlockSize_MPIDense,
858        0,
859        0,
860        0,
861        0,
862        0,
863        0,
864        0,
865        0,
866        0,
867        0,
868        0,
869        0,
870        MatGetMaps_Petsc};
871 
872 #undef __FUNC__
873 #define __FUNC__ "MatCreateMPIDense"
874 /*@C
875    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
876 
877    Collective on MPI_Comm
878 
879    Input Parameters:
880 +  comm - MPI communicator
881 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
882 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
883 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
884 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
885 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
886    to control all matrix memory allocation.
887 
888    Output Parameter:
889 .  A - the matrix
890 
891    Notes:
892    The dense format is fully compatible with standard Fortran 77
893    storage by columns.
894 
895    The data input variable is intended primarily for Fortran programmers
896    who wish to allocate their own matrix memory space.  Most users should
897    set data=PETSC_NULL.
898 
899    The user MUST specify either the local or global matrix dimensions
900    (possibly both).
901 
902    Currently, the only parallel dense matrix decomposition is by rows,
903    so that n=N and each submatrix owns all of the global columns.
904 
905    Level: intermediate
906 
907 .keywords: matrix, dense, parallel
908 
909 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
910 @*/
911 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
912 {
913   Mat          mat;
914   Mat_MPIDense *a;
915   int          ierr, i,flg;
916 
917   PetscFunctionBegin;
918   /* Note:  For now, when data is specified above, this assumes the user correctly
919    allocates the local dense storage space.  We should add error checking. */
920 
921   *A = 0;
922   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
923   PLogObjectCreate(mat);
924   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
925   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
926   mat->ops->destroy = MatDestroy_MPIDense;
927   mat->ops->view    = MatView_MPIDense;
928   mat->factor       = 0;
929   mat->mapping      = 0;
930 
931   a->factor       = 0;
932   mat->insertmode = NOT_SET_VALUES;
933   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
934   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
935 
936   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
937 
938   /*
939      The computation of n is wrong below, n should represent the number of local
940      rows in the right (column vector)
941   */
942 
943   /* each row stores all columns */
944   if (N == PETSC_DECIDE) N = n;
945   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
946   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
947   a->N = mat->N = N;
948   a->M = mat->M = M;
949   a->m = mat->m = m;
950   a->n = mat->n = n;
951 
952   /* the information in the maps duplicates the information computed below, eventually
953      we should remove the duplicate information that is not contained in the maps */
954   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
955   ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr);
956 
957   /* build local table of row and column ownerships */
958   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
959   a->cowners = a->rowners + a->size + 1;
960   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
961   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
962   a->rowners[0] = 0;
963   for ( i=2; i<=a->size; i++ ) {
964     a->rowners[i] += a->rowners[i-1];
965   }
966   a->rstart = a->rowners[a->rank];
967   a->rend   = a->rowners[a->rank+1];
968   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
969   a->cowners[0] = 0;
970   for ( i=2; i<=a->size; i++ ) {
971     a->cowners[i] += a->cowners[i-1];
972   }
973 
974   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
975   PLogObjectParent(mat,a->A);
976 
977   /* build cache for off array entries formed */
978   a->donotstash = 0;
979   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
980 
981   /* stuff used for matrix vector multiply */
982   a->lvec        = 0;
983   a->Mvctx       = 0;
984   a->roworiented = 1;
985 
986   *A = mat;
987   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
988   if (flg) {
989     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNC__
995 #define __FUNC__ "MatDuplicate_MPIDense"
996 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
997 {
998   Mat          mat;
999   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1000   int          ierr;
1001   FactorCtx    *factor;
1002 
1003   PetscFunctionBegin;
1004   *newmat       = 0;
1005   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1006   PLogObjectCreate(mat);
1007   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1008   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1009   mat->ops->destroy = MatDestroy_MPIDense;
1010   mat->ops->view    = MatView_MPIDense;
1011   mat->factor       = A->factor;
1012   mat->assembled    = PETSC_TRUE;
1013 
1014   a->m = mat->m = oldmat->m;
1015   a->n = mat->n = oldmat->n;
1016   a->M = mat->M = oldmat->M;
1017   a->N = mat->N = oldmat->N;
1018   if (oldmat->factor) {
1019     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1020     /* copy factor contents ... add this code! */
1021   } else a->factor = 0;
1022 
1023   a->rstart       = oldmat->rstart;
1024   a->rend         = oldmat->rend;
1025   a->size         = oldmat->size;
1026   a->rank         = oldmat->rank;
1027   mat->insertmode = NOT_SET_VALUES;
1028   a->donotstash   = oldmat->donotstash;
1029   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1030   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1031   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1032   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1033 
1034   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1035   PLogObjectParent(mat,a->lvec);
1036   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1037   PLogObjectParent(mat,a->Mvctx);
1038   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1039   PLogObjectParent(mat,a->A);
1040   *newmat = mat;
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #include "sys.h"
1045 
1046 #undef __FUNC__
1047 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1048 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1049 {
1050   int        *rowners, i,size,rank,m,ierr,nz,j;
1051   Scalar     *array,*vals,*vals_ptr;
1052   MPI_Status status;
1053 
1054   PetscFunctionBegin;
1055   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1056   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1057 
1058   /* determine ownership of all rows */
1059   m          = M/size + ((M % size) > rank);
1060   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1061   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1062   rowners[0] = 0;
1063   for ( i=2; i<=size; i++ ) {
1064     rowners[i] += rowners[i-1];
1065   }
1066 
1067   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1068   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1069 
1070   if (!rank) {
1071     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1072 
1073     /* read in my part of the matrix numerical values  */
1074     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1075 
1076     /* insert into matrix-by row (this is why cannot directly read into array */
1077     vals_ptr = vals;
1078     for ( i=0; i<m; i++ ) {
1079       for ( j=0; j<N; j++ ) {
1080         array[i + j*m] = *vals_ptr++;
1081       }
1082     }
1083 
1084     /* read in other processors and ship out */
1085     for ( i=1; i<size; i++ ) {
1086       nz   = (rowners[i+1] - rowners[i])*N;
1087       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1088       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1089     }
1090   } else {
1091     /* receive numeric values */
1092     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1093 
1094     /* receive message of values*/
1095     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1096 
1097     /* insert into matrix-by row (this is why cannot directly read into array */
1098     vals_ptr = vals;
1099     for ( i=0; i<m; i++ ) {
1100       for ( j=0; j<N; j++ ) {
1101         array[i + j*m] = *vals_ptr++;
1102       }
1103     }
1104   }
1105   ierr = PetscFree(rowners);CHKERRQ(ierr);
1106   ierr = PetscFree(vals);CHKERRQ(ierr);
1107   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1108   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 
1113 #undef __FUNC__
1114 #define __FUNC__ "MatLoad_MPIDense"
1115 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1116 {
1117   Mat          A;
1118   Scalar       *vals,*svals;
1119   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1120   MPI_Status   status;
1121   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1122   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1123   int          tag = ((PetscObject)viewer)->tag;
1124   int          i, nz, ierr, j,rstart, rend, fd;
1125 
1126   PetscFunctionBegin;
1127   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1128   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1129   if (!rank) {
1130     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1131     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1132     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1133   }
1134 
1135   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1136   M = header[1]; N = header[2]; nz = header[3];
1137 
1138   /*
1139        Handle case where matrix is stored on disk as a dense matrix
1140   */
1141   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1142     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1143     PetscFunctionReturn(0);
1144   }
1145 
1146   /* determine ownership of all rows */
1147   m          = M/size + ((M % size) > rank);
1148   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1149   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1150   rowners[0] = 0;
1151   for ( i=2; i<=size; i++ ) {
1152     rowners[i] += rowners[i-1];
1153   }
1154   rstart = rowners[rank];
1155   rend   = rowners[rank+1];
1156 
1157   /* distribute row lengths to all processors */
1158   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1159   offlens = ourlens + (rend-rstart);
1160   if (!rank) {
1161     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1162     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1163     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1164     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1165     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1166     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1167   } else {
1168     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1169   }
1170 
1171   if (!rank) {
1172     /* calculate the number of nonzeros on each processor */
1173     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1174     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1175     for ( i=0; i<size; i++ ) {
1176       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1177         procsnz[i] += rowlengths[j];
1178       }
1179     }
1180     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1181 
1182     /* determine max buffer needed and allocate it */
1183     maxnz = 0;
1184     for ( i=0; i<size; i++ ) {
1185       maxnz = PetscMax(maxnz,procsnz[i]);
1186     }
1187     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1188 
1189     /* read in my part of the matrix column indices  */
1190     nz = procsnz[0];
1191     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1192     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1193 
1194     /* read in every one elses and ship off */
1195     for ( i=1; i<size; i++ ) {
1196       nz   = procsnz[i];
1197       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1198       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1199     }
1200     ierr = PetscFree(cols);CHKERRQ(ierr);
1201   } else {
1202     /* determine buffer space needed for message */
1203     nz = 0;
1204     for ( i=0; i<m; i++ ) {
1205       nz += ourlens[i];
1206     }
1207     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1208 
1209     /* receive message of column indices*/
1210     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1211     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1212     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1213   }
1214 
1215   /* loop over local rows, determining number of off diagonal entries */
1216   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1217   jj = 0;
1218   for ( i=0; i<m; i++ ) {
1219     for ( j=0; j<ourlens[i]; j++ ) {
1220       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1221       jj++;
1222     }
1223   }
1224 
1225   /* create our matrix */
1226   for ( i=0; i<m; i++ ) {
1227     ourlens[i] -= offlens[i];
1228   }
1229   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1230   A = *newmat;
1231   for ( i=0; i<m; i++ ) {
1232     ourlens[i] += offlens[i];
1233   }
1234 
1235   if (!rank) {
1236     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1237 
1238     /* read in my part of the matrix numerical values  */
1239     nz = procsnz[0];
1240     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1241 
1242     /* insert into matrix */
1243     jj      = rstart;
1244     smycols = mycols;
1245     svals   = vals;
1246     for ( i=0; i<m; i++ ) {
1247       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1248       smycols += ourlens[i];
1249       svals   += ourlens[i];
1250       jj++;
1251     }
1252 
1253     /* read in other processors and ship out */
1254     for ( i=1; i<size; i++ ) {
1255       nz   = procsnz[i];
1256       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1257       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1258     }
1259     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1260   } else {
1261     /* receive numeric values */
1262     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1263 
1264     /* receive message of values*/
1265     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1266     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1267     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1268 
1269     /* insert into matrix */
1270     jj      = rstart;
1271     smycols = mycols;
1272     svals   = vals;
1273     for ( i=0; i<m; i++ ) {
1274       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1275       smycols += ourlens[i];
1276       svals   += ourlens[i];
1277       jj++;
1278     }
1279   }
1280   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1281   ierr = PetscFree(vals);CHKERRQ(ierr);
1282   ierr = PetscFree(mycols);CHKERRQ(ierr);
1283   ierr = PetscFree(rowners);CHKERRQ(ierr);
1284 
1285   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1286   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 
1291 
1292 
1293 
1294