xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 273d9f13de75c4ed17021f7f2c11eebb99d26f0d)
1 /*$Id: mpidense.c,v 1.144 2000/09/28 21:10:58 bsmith Exp bsmith $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNC__
12 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,rank,ierr;
17   Scalar       *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
27   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
28   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
29   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
30   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
33 
34   *iscopy = PETSC_TRUE;
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN int MatSetUpMultiply_MPIDense(Mat);
40 
41 #undef __FUNC__
42 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIDense"
43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
44 {
45   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
46   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
47   PetscTruth   roworiented = A->roworiented;
48 
49   PetscFunctionBegin;
50   for (i=0; i<m; i++) {
51     if (idxm[i] < 0) continue;
52     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
53     if (idxm[i] >= rstart && idxm[i] < rend) {
54       row = idxm[i] - rstart;
55       if (roworiented) {
56         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
57       } else {
58         for (j=0; j<n; j++) {
59           if (idxn[j] < 0) continue;
60           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
61           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
62         }
63       }
64     } else {
65       if (!A->donotstash) {
66         if (roworiented) {
67           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
68         } else {
69           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
70         }
71       }
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIDense"
79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
80 {
81   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
82   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
83 
84   PetscFunctionBegin;
85   for (i=0; i<m; i++) {
86     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
87     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
88     if (idxm[i] >= rstart && idxm[i] < rend) {
89       row = idxm[i] - rstart;
90       for (j=0; j<n; j++) {
91         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
92         if (idxn[j] >= mat->N) {
93           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
94         }
95         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
96       }
97     } else {
98       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
99     }
100   }
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNC__
105 #define __FUNC__ /*<a name=""></a>*/"MatGetArray_MPIDense"
106 int MatGetArray_MPIDense(Mat A,Scalar **array)
107 {
108   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
109   int          ierr;
110 
111   PetscFunctionBegin;
112   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNC__
117 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIDense"
118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
119 {
120   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
121   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
122   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols,rank;
123   Scalar       *av,*bv,*v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
128   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
129   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
130   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
131   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
132 
133   /* No parallel redistribution currently supported! Should really check each index set
134      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
135      original matrix! */
136 
137   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
138   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
139 
140   /* Check submatrix call */
141   if (scall == MAT_REUSE_MATRIX) {
142     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
143     /* Really need to test rows and column sizes! */
144     newmat = *B;
145   } else {
146     /* Create and fill new matrix */
147     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
148   }
149 
150   /* Now extract the data pointers and do the copy, column at a time */
151   newmatd = (Mat_MPIDense*)newmat->data;
152   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
153 
154   for (i=0; i<ncols; i++) {
155     av = v + nlrows*icol[i];
156     for (j=0; j<nrows; j++) {
157       *bv++ = av[irow[j] - rstart];
158     }
159   }
160 
161   /* Assemble the matrices so that the correct flags are set */
162   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
163   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164 
165   /* Free work space */
166   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
167   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
168   *B = newmat;
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNC__
173 #define __FUNC__ /*<a name=""></a>*/"MatRestoreArray_MPIDense"
174 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
175 {
176   PetscFunctionBegin;
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNC__
181 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIDense"
182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
183 {
184   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
185   MPI_Comm     comm = mat->comm;
186   int          ierr,nstash,reallocs;
187   InsertMode   addv;
188 
189   PetscFunctionBegin;
190   /* make sure all processors are either in INSERTMODE or ADDMODE */
191   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
192   if (addv == (ADD_VALUES|INSERT_VALUES)) {
193     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
194   }
195   mat->insertmode = addv; /* in case this processor had no cache */
196 
197   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
198   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
199   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIDense"
205 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
206 {
207   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
208   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
209   Scalar       *val;
210   InsertMode   addv=mat->insertmode;
211 
212   PetscFunctionBegin;
213   /*  wait on receives */
214   while (1) {
215     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
216     if (!flg) break;
217 
218     for (i=0; i<n;) {
219       /* Now identify the consecutive vals belonging to the same row */
220       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
221       if (j < n) ncols = j-i;
222       else       ncols = n-i;
223       /* Now assemble all these values with a single function call */
224       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
225       i = j;
226     }
227   }
228   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
229 
230   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNC__
240 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIDense"
241 int MatZeroEntries_MPIDense(Mat A)
242 {
243   int          ierr;
244   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
245 
246   PetscFunctionBegin;
247   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNC__
252 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIDense"
253 int MatGetBlockSize_MPIDense(Mat A,int *bs)
254 {
255   PetscFunctionBegin;
256   *bs = 1;
257   PetscFunctionReturn(0);
258 }
259 
260 /* the code does not do the diagonal entries correctly unless the
261    matrix is square and the column and row owerships are identical.
262    This is a BUG. The only way to fix it seems to be to access
263    mdn->A and mdn->B directly and not through the MatZeroRows()
264    routine.
265 */
266 #undef __FUNC__
267 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIDense"
268 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
269 {
270   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
271   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
272   int            *procs,*nprocs,j,found,idx,nsends,*work;
273   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
274   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
275   int            *lens,imdex,*lrows,*values;
276   MPI_Comm       comm = A->comm;
277   MPI_Request    *send_waits,*recv_waits;
278   MPI_Status     recv_status,*send_status;
279   IS             istmp;
280 
281   PetscFunctionBegin;
282   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
283   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
284 
285   /*  first count number of contributors to each processor */
286   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
287   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
288   procs  = nprocs + size;
289   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
290   for (i=0; i<N; i++) {
291     idx = rows[i];
292     found = 0;
293     for (j=0; j<size; j++) {
294       if (idx >= owners[j] && idx < owners[j+1]) {
295         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
296       }
297     }
298     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
299   }
300   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
301 
302   /* inform other processors of number of messages and max length*/
303   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
304   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
305   nmax   = work[rank];
306   nrecvs = work[size+rank];
307   ierr   = PetscFree(work);CHKERRQ(ierr);
308 
309   /* post receives:   */
310   rvalues    = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
311   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
312   for (i=0; i<nrecvs; i++) {
313     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
314   }
315 
316   /* do sends:
317       1) starts[i] gives the starting index in svalues for stuff going to
318          the ith processor
319   */
320   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
321   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
322   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
323   starts[0]  = 0;
324   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
325   for (i=0; i<N; i++) {
326     svalues[starts[owner[i]]++] = rows[i];
327   }
328   ISRestoreIndices(is,&rows);
329 
330   starts[0] = 0;
331   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
332   count = 0;
333   for (i=0; i<size; i++) {
334     if (procs[i]) {
335       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
336     }
337   }
338   ierr = PetscFree(starts);CHKERRQ(ierr);
339 
340   base = owners[rank];
341 
342   /*  wait on receives */
343   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
344   source = lens + nrecvs;
345   count  = nrecvs; slen = 0;
346   while (count) {
347     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
348     /* unpack receives into our local space */
349     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
350     source[imdex]  = recv_status.MPI_SOURCE;
351     lens[imdex]  = n;
352     slen += n;
353     count--;
354   }
355   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
356 
357   /* move the data into the send scatter */
358   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
359   count = 0;
360   for (i=0; i<nrecvs; i++) {
361     values = rvalues + i*nmax;
362     for (j=0; j<lens[i]; j++) {
363       lrows[count++] = values[j] - base;
364     }
365   }
366   ierr = PetscFree(rvalues);CHKERRQ(ierr);
367   ierr = PetscFree(lens);CHKERRQ(ierr);
368   ierr = PetscFree(owner);CHKERRQ(ierr);
369   ierr = PetscFree(nprocs);CHKERRQ(ierr);
370 
371   /* actually zap the local rows */
372   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
373   PLogObjectParent(A,istmp);
374   ierr = PetscFree(lrows);CHKERRQ(ierr);
375   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
376   ierr = ISDestroy(istmp);CHKERRQ(ierr);
377 
378   /* wait on sends */
379   if (nsends) {
380     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
381     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
382     ierr = PetscFree(send_status);CHKERRQ(ierr);
383   }
384   ierr = PetscFree(send_waits);CHKERRQ(ierr);
385   ierr = PetscFree(svalues);CHKERRQ(ierr);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNC__
391 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIDense"
392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
395   int          ierr;
396 
397   PetscFunctionBegin;
398   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIDense"
406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
409   int          ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNC__
419 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIDense"
420 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
421 {
422   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
423   int          ierr;
424   Scalar       zero = 0.0;
425 
426   PetscFunctionBegin;
427   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
428   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
429   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNC__
435 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIDense"
436 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
439   int          ierr;
440 
441   PetscFunctionBegin;
442   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
443   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
444   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNC__
450 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIDense"
451 int MatGetDiagonal_MPIDense(Mat A,Vec v)
452 {
453   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
454   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
455   int          ierr,len,i,n,m = A->m,radd;
456   Scalar       *x,zero = 0.0;
457 
458   PetscFunctionBegin;
459   ierr = VecSet(&zero,v);CHKERRQ(ierr);
460   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
461   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
462   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
463   len  = PetscMin(a->A->m,a->A->n);
464   radd = a->rstart*m;
465   for (i=0; i<len; i++) {
466     x[i] = aloc->v[radd + i*m + i];
467   }
468   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIDense"
474 int MatDestroy_MPIDense(Mat mat)
475 {
476   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
477   int          ierr;
478 
479   PetscFunctionBegin;
480 
481 #if defined(PETSC_USE_LOG)
482   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
483 #endif
484   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
485   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
486   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
487   if (mdn->lvec)   VecDestroy(mdn->lvec);
488   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
489   if (mdn->factor) {
490     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
491     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
492     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
493     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
494   }
495   ierr = PetscFree(mdn);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNC__
500 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_Binary"
501 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
502 {
503   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
504   int          ierr;
505 
506   PetscFunctionBegin;
507   if (mdn->size == 1) {
508     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
509   }
510   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
511   PetscFunctionReturn(0);
512 }
513 
514 #undef __FUNC__
515 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_ASCIIorDraworSocket"
516 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,Viewer viewer)
517 {
518   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
519   int          ierr,format,size = mdn->size,rank = mdn->rank;
520   ViewerType   vtype;
521   PetscTruth   isascii,isdraw;
522   Viewer       sviewer;
523 
524   PetscFunctionBegin;
525   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
526   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
527   if (isascii) {
528     ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
529     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
530     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
531       MatInfo info;
532       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
533       ierr = ViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
534                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
535       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
536       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
537       PetscFunctionReturn(0);
538     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
539       PetscFunctionReturn(0);
540     }
541   } else if (isdraw) {
542     Draw       draw;
543     PetscTruth isnull;
544 
545     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
546     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr);
547     if (isnull) PetscFunctionReturn(0);
548   }
549 
550   if (size == 1) {
551     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
552   } else {
553     /* assemble the entire matrix onto first processor. */
554     Mat          A;
555     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
556     Scalar       *vals;
557 
558     if (!rank) {
559       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
560     } else {
561       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
562     }
563     PLogObjectParent(mat,A);
564 
565     /* Copy the matrix ... This isn't the most efficient means,
566        but it's quick for now */
567     row = mdn->rstart; m = mdn->A->m;
568     for (i=0; i<m; i++) {
569       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
570       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
571       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
572       row++;
573     }
574 
575     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
576     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
577     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
578     if (!rank) {
579       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
580     }
581     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
582     ierr = ViewerFlush(viewer);CHKERRQ(ierr);
583     ierr = MatDestroy(A);CHKERRQ(ierr);
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNC__
589 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense"
590 int MatView_MPIDense(Mat mat,Viewer viewer)
591 {
592   int        ierr;
593   PetscTruth isascii,isbinary,isdraw,issocket;
594 
595   PetscFunctionBegin;
596 
597   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
601 
602   if (isascii || issocket || isdraw) {
603     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
604   } else if (isbinary) {
605     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
606   } else {
607     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNC__
613 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIDense"
614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
615 {
616   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
617   Mat          mdn = mat->A;
618   int          ierr;
619   PetscReal    isend[5],irecv[5];
620 
621   PetscFunctionBegin;
622   info->rows_global    = (double)A->M;
623   info->columns_global = (double)A->N;
624   info->rows_local     = (double)A->m;
625   info->columns_local  = (double)A->N;
626   info->block_size     = 1.0;
627   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
628   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
629   isend[3] = info->memory;  isend[4] = info->mallocs;
630   if (flag == MAT_LOCAL) {
631     info->nz_used      = isend[0];
632     info->nz_allocated = isend[1];
633     info->nz_unneeded  = isend[2];
634     info->memory       = isend[3];
635     info->mallocs      = isend[4];
636   } else if (flag == MAT_GLOBAL_MAX) {
637     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
638     info->nz_used      = irecv[0];
639     info->nz_allocated = irecv[1];
640     info->nz_unneeded  = irecv[2];
641     info->memory       = irecv[3];
642     info->mallocs      = irecv[4];
643   } else if (flag == MAT_GLOBAL_SUM) {
644     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
645     info->nz_used      = irecv[0];
646     info->nz_allocated = irecv[1];
647     info->nz_unneeded  = irecv[2];
648     info->memory       = irecv[3];
649     info->mallocs      = irecv[4];
650   }
651   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
652   info->fill_ratio_needed = 0;
653   info->factor_mallocs    = 0;
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNC__
658 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIDense"
659 int MatSetOption_MPIDense(Mat A,MatOption op)
660 {
661   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
662   int          ierr;
663 
664   PetscFunctionBegin;
665   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
666       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
667       op == MAT_NEW_NONZERO_LOCATION_ERR ||
668       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
669       op == MAT_COLUMNS_SORTED ||
670       op == MAT_COLUMNS_UNSORTED) {
671         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
672   } else if (op == MAT_ROW_ORIENTED) {
673         a->roworiented = PETSC_TRUE;
674         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
675   } else if (op == MAT_ROWS_SORTED ||
676              op == MAT_ROWS_UNSORTED ||
677              op == MAT_SYMMETRIC ||
678              op == MAT_STRUCTURALLY_SYMMETRIC ||
679              op == MAT_YES_NEW_DIAGONALS ||
680              op == MAT_USE_HASH_TABLE) {
681     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
682   } else if (op == MAT_COLUMN_ORIENTED) {
683     a->roworiented = PETSC_FALSE;
684     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
685   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
686     a->donotstash = PETSC_TRUE;
687   } else if (op == MAT_NO_NEW_DIAGONALS) {
688     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
689   } else {
690     SETERRQ(PETSC_ERR_SUP,"unknown option");
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNC__
696 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIDense"
697 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
698 {
699   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
700 
701   PetscFunctionBegin;
702   if (m) *m = mat->rstart;
703   if (n) *n = mat->rend;
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNC__
708 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIDense"
709 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
710 {
711   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
712   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
713 
714   PetscFunctionBegin;
715   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
716   lrow = row - rstart;
717   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNC__
722 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIDense"
723 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
724 {
725   int ierr;
726 
727   PetscFunctionBegin;
728   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
729   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
730   PetscFunctionReturn(0);
731 }
732 
733 #undef __FUNC__
734 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIDense"
735 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
736 {
737   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
738   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
739   Scalar       *l,*r,x,*v;
740   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
741 
742   PetscFunctionBegin;
743   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
744   if (ll) {
745     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
746     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
747     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
748     for (i=0; i<m; i++) {
749       x = l[i];
750       v = mat->v + i;
751       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
752     }
753     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
754     PLogFlops(n*m);
755   }
756   if (rr) {
757     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
758     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
759     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
760     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
761     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
762     for (i=0; i<n; i++) {
763       x = r[i];
764       v = mat->v + i*m;
765       for (j=0; j<m; j++) { (*v++) *= x;}
766     }
767     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
768     PLogFlops(n*m);
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNC__
774 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIDense"
775 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm)
776 {
777   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
778   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
779   int          ierr,i,j;
780   PetscReal    sum = 0.0;
781   Scalar       *v = mat->v;
782 
783   PetscFunctionBegin;
784   if (mdn->size == 1) {
785     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
786   } else {
787     if (type == NORM_FROBENIUS) {
788       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
789 #if defined(PETSC_USE_COMPLEX)
790         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
791 #else
792         sum += (*v)*(*v); v++;
793 #endif
794       }
795       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
796       *norm = sqrt(*norm);
797       PLogFlops(2*mdn->A->n*mdn->A->m);
798     } else if (type == NORM_1) {
799       PetscReal *tmp,*tmp2;
800       tmp  = (PetscReal*)PetscMalloc(2*A->N*sizeof(PetscReal));CHKPTRQ(tmp);
801       tmp2 = tmp + A->N;
802       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
803       *norm = 0.0;
804       v = mat->v;
805       for (j=0; j<mdn->A->n; j++) {
806         for (i=0; i<mdn->A->m; i++) {
807           tmp[j] += PetscAbsScalar(*v);  v++;
808         }
809       }
810       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
811       for (j=0; j<A->N; j++) {
812         if (tmp2[j] > *norm) *norm = tmp2[j];
813       }
814       ierr = PetscFree(tmp);CHKERRQ(ierr);
815       PLogFlops(A->n*A->m);
816     } else if (type == NORM_INFINITY) { /* max row norm */
817       PetscReal ntemp;
818       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
819       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
820     } else {
821       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
822     }
823   }
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNC__
828 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIDense"
829 int MatTranspose_MPIDense(Mat A,Mat *matout)
830 {
831   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
832   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
833   Mat          B;
834   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
835   int          j,i,ierr;
836   Scalar       *v;
837 
838   PetscFunctionBegin;
839   if (!matout && M != N) {
840     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
841   }
842   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
843 
844   m = a->A->m; n = a->A->n; v = Aloc->v;
845   rwork = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
846   for (j=0; j<n; j++) {
847     for (i=0; i<m; i++) rwork[i] = rstart + i;
848     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
849     v   += m;
850   }
851   ierr = PetscFree(rwork);CHKERRQ(ierr);
852   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   if (matout) {
855     *matout = B;
856   } else {
857     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
858   }
859   PetscFunctionReturn(0);
860 }
861 
862 #include "petscblaslapack.h"
863 #undef __FUNC__
864 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIDense"
865 int MatScale_MPIDense(Scalar *alpha,Mat inA)
866 {
867   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
868   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
869   int          one = 1,nz;
870 
871   PetscFunctionBegin;
872   nz = inA->m*inA->N;
873   BLscal_(&nz,alpha,a->v,&one);
874   PLogFlops(nz);
875   PetscFunctionReturn(0);
876 }
877 
878 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
879 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
880 
881 #undef __FUNC__
882 #define __FUNC__ /*<a name="MatSetUpPreallocation_MPIDense"></a>*/"MatSetUpPreallocation_MPIDense"
883 int MatSetUpPreallocation_MPIDense(Mat A)
884 {
885   int        ierr;
886 
887   PetscFunctionBegin;
888   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 /* -------------------------------------------------------------------*/
893 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
894        MatGetRow_MPIDense,
895        MatRestoreRow_MPIDense,
896        MatMult_MPIDense,
897        MatMultAdd_MPIDense,
898        MatMultTranspose_MPIDense,
899        MatMultTransposeAdd_MPIDense,
900        0,
901        0,
902        0,
903        0,
904        0,
905        0,
906        0,
907        MatTranspose_MPIDense,
908        MatGetInfo_MPIDense,0,
909        MatGetDiagonal_MPIDense,
910        MatDiagonalScale_MPIDense,
911        MatNorm_MPIDense,
912        MatAssemblyBegin_MPIDense,
913        MatAssemblyEnd_MPIDense,
914        0,
915        MatSetOption_MPIDense,
916        MatZeroEntries_MPIDense,
917        MatZeroRows_MPIDense,
918        0,
919        0,
920        0,
921        0,
922        MatSetUpPreallocation_MPIDense,
923        0,
924        MatGetOwnershipRange_MPIDense,
925        0,
926        0,
927        MatGetArray_MPIDense,
928        MatRestoreArray_MPIDense,
929        MatDuplicate_MPIDense,
930        0,
931        0,
932        0,
933        0,
934        0,
935        MatGetSubMatrices_MPIDense,
936        0,
937        MatGetValues_MPIDense,
938        0,
939        0,
940        MatScale_MPIDense,
941        0,
942        0,
943        0,
944        MatGetBlockSize_MPIDense,
945        0,
946        0,
947        0,
948        0,
949        0,
950        0,
951        0,
952        0,
953        0,
954        MatGetSubMatrix_MPIDense,
955        MatDestroy_MPIDense,
956        MatView_MPIDense,
957        MatGetMaps_Petsc};
958 
959 EXTERN_C_BEGIN
960 #undef __FUNC__
961 #define __FUNC__ /*<a name=""></a>*/"MatCreate_MPIDense"
962 int MatCreate_MPIDense(Mat mat)
963 {
964   Mat_MPIDense *a;
965   int          ierr,i;
966 
967   PetscFunctionBegin;
968   mat->data         = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
969   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
970   mat->factor       = 0;
971   mat->mapping      = 0;
972 
973   a->factor       = 0;
974   mat->insertmode = NOT_SET_VALUES;
975   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
976   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
977 
978   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
979   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
980   a->nvec = mat->n;
981 
982   /* the information in the maps duplicates the information computed below, eventually
983      we should remove the duplicate information that is not contained in the maps */
984   ierr = MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
985   ierr = MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
986 
987   /* build local table of row and column ownerships */
988   a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
989   a->cowners = a->rowners + a->size + 1;
990   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
991   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
992   a->rowners[0] = 0;
993   for (i=2; i<=a->size; i++) {
994     a->rowners[i] += a->rowners[i-1];
995   }
996   a->rstart = a->rowners[a->rank];
997   a->rend   = a->rowners[a->rank+1];
998   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
999   a->cowners[0] = 0;
1000   for (i=2; i<=a->size; i++) {
1001     a->cowners[i] += a->cowners[i-1];
1002   }
1003 
1004   /* build cache for off array entries formed */
1005   a->donotstash = PETSC_FALSE;
1006   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1007 
1008   /* stuff used for matrix vector multiply */
1009   a->lvec        = 0;
1010   a->Mvctx       = 0;
1011   a->roworiented = PETSC_TRUE;
1012 
1013   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1014                                      "MatGetDiagonalBlock_MPIDense",
1015                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 EXTERN_C_END
1019 
1020 #undef __FUNC__
1021 #define __FUNC__ /*<a name=""></a>*/"MatMPIDenseSetPreallocation"
1022 /*@C
1023    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1024 
1025    Not collective
1026 
1027    Input Parameters:
1028 .  A - the matrix
1029 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1030    to control all matrix memory allocation.
1031 
1032    Notes:
1033    The dense format is fully compatible with standard Fortran 77
1034    storage by columns.
1035 
1036    The data input variable is intended primarily for Fortran programmers
1037    who wish to allocate their own matrix memory space.  Most users should
1038    set data=PETSC_NULL.
1039 
1040    Level: intermediate
1041 
1042 .keywords: matrix,dense, parallel
1043 
1044 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1045 @*/
1046 int MatMPIDenseSetPreallocation(Mat mat,Scalar *data)
1047 {
1048   Mat_MPIDense *a;
1049   int          ierr;
1050   PetscTruth   flg2;
1051 
1052   PetscFunctionBegin;
1053   ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr);
1054   if (!flg2) PetscFunctionReturn(0);
1055   mat->preallocated = PETSC_TRUE;
1056   /* Note:  For now, when data is specified above, this assumes the user correctly
1057    allocates the local dense storage space.  We should add error checking. */
1058 
1059   a    = (Mat_MPIDense*)mat->data;
1060   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
1061   PLogObjectParent(mat,a->A);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNC__
1066 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIDense"
1067 /*@C
1068    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1069 
1070    Collective on MPI_Comm
1071 
1072    Input Parameters:
1073 +  comm - MPI communicator
1074 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1075 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1076 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1077 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1078 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1079    to control all matrix memory allocation.
1080 
1081    Output Parameter:
1082 .  A - the matrix
1083 
1084    Notes:
1085    The dense format is fully compatible with standard Fortran 77
1086    storage by columns.
1087 
1088    The data input variable is intended primarily for Fortran programmers
1089    who wish to allocate their own matrix memory space.  Most users should
1090    set data=PETSC_NULL.
1091 
1092    The user MUST specify either the local or global matrix dimensions
1093    (possibly both).
1094 
1095    Level: intermediate
1096 
1097 .keywords: matrix,dense, parallel
1098 
1099 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1100 @*/
1101 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1102 {
1103   int ierr,size;
1104 
1105   PetscFunctionBegin;
1106   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1107   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1108   if (size > 1) {
1109     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1110     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1111   } else {
1112     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1113     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1114   }
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNC__
1119 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIDense"
1120 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1121 {
1122   Mat          mat;
1123   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1124   int          ierr;
1125   FactorCtx    *factor;
1126 
1127   PetscFunctionBegin;
1128   *newmat       = 0;
1129   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1130   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1131   mat->data         = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1132   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1133   mat->factor       = A->factor;
1134   mat->assembled    = PETSC_TRUE;
1135   mat->preallocated = PETSC_TRUE;
1136 
1137   if (oldmat->factor) {
1138     a->factor = (FactorCtx*)(factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1139     /* copy factor contents ... add this code! */
1140   } else a->factor = 0;
1141 
1142   a->rstart       = oldmat->rstart;
1143   a->rend         = oldmat->rend;
1144   a->size         = oldmat->size;
1145   a->rank         = oldmat->rank;
1146   mat->insertmode = NOT_SET_VALUES;
1147   a->nvec         = oldmat->nvec;
1148   a->donotstash   = oldmat->donotstash;
1149   a->rowners = (int*)PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1150   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1151   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1152   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1153 
1154   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1155   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1156   PLogObjectParent(mat,a->A);
1157   *newmat = mat;
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #include "petscsys.h"
1162 
1163 #undef __FUNC__
1164 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense_DenseInFile"
1165 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1166 {
1167   int        *rowners,i,size,rank,m,ierr,nz,j;
1168   Scalar     *array,*vals,*vals_ptr;
1169   MPI_Status status;
1170 
1171   PetscFunctionBegin;
1172   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1173   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1174 
1175   /* determine ownership of all rows */
1176   m          = M/size + ((M % size) > rank);
1177   rowners    = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1178   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1179   rowners[0] = 0;
1180   for (i=2; i<=size; i++) {
1181     rowners[i] += rowners[i-1];
1182   }
1183 
1184   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1185   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1186 
1187   if (!rank) {
1188     vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals);
1189 
1190     /* read in my part of the matrix numerical values  */
1191     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1192 
1193     /* insert into matrix-by row (this is why cannot directly read into array */
1194     vals_ptr = vals;
1195     for (i=0; i<m; i++) {
1196       for (j=0; j<N; j++) {
1197         array[i + j*m] = *vals_ptr++;
1198       }
1199     }
1200 
1201     /* read in other processors and ship out */
1202     for (i=1; i<size; i++) {
1203       nz   = (rowners[i+1] - rowners[i])*N;
1204       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1205       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1206     }
1207   } else {
1208     /* receive numeric values */
1209     vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals);
1210 
1211     /* receive message of values*/
1212     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1213 
1214     /* insert into matrix-by row (this is why cannot directly read into array */
1215     vals_ptr = vals;
1216     for (i=0; i<m; i++) {
1217       for (j=0; j<N; j++) {
1218         array[i + j*m] = *vals_ptr++;
1219       }
1220     }
1221   }
1222   ierr = PetscFree(rowners);CHKERRQ(ierr);
1223   ierr = PetscFree(vals);CHKERRQ(ierr);
1224   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1225   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 EXTERN_C_BEGIN
1230 #undef __FUNC__
1231 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense"
1232 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1233 {
1234   Mat          A;
1235   Scalar       *vals,*svals;
1236   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1237   MPI_Status   status;
1238   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1239   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1240   int          tag = ((PetscObject)viewer)->tag;
1241   int          i,nz,ierr,j,rstart,rend,fd;
1242 
1243   PetscFunctionBegin;
1244   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1245   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1246   if (!rank) {
1247     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1248     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1249     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1250   }
1251 
1252   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1253   M = header[1]; N = header[2]; nz = header[3];
1254 
1255   /*
1256        Handle case where matrix is stored on disk as a dense matrix
1257   */
1258   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1259     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1260     PetscFunctionReturn(0);
1261   }
1262 
1263   /* determine ownership of all rows */
1264   m          = M/size + ((M % size) > rank);
1265   rowners    = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1266   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1267   rowners[0] = 0;
1268   for (i=2; i<=size; i++) {
1269     rowners[i] += rowners[i-1];
1270   }
1271   rstart = rowners[rank];
1272   rend   = rowners[rank+1];
1273 
1274   /* distribute row lengths to all processors */
1275   ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens);
1276   offlens = ourlens + (rend-rstart);
1277   if (!rank) {
1278     rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths);
1279     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1280     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
1281     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1282     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1283     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1284   } else {
1285     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1286   }
1287 
1288   if (!rank) {
1289     /* calculate the number of nonzeros on each processor */
1290     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
1291     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1292     for (i=0; i<size; i++) {
1293       for (j=rowners[i]; j< rowners[i+1]; j++) {
1294         procsnz[i] += rowlengths[j];
1295       }
1296     }
1297     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1298 
1299     /* determine max buffer needed and allocate it */
1300     maxnz = 0;
1301     for (i=0; i<size; i++) {
1302       maxnz = PetscMax(maxnz,procsnz[i]);
1303     }
1304     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
1305 
1306     /* read in my part of the matrix column indices  */
1307     nz = procsnz[0];
1308     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1309     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1310 
1311     /* read in every one elses and ship off */
1312     for (i=1; i<size; i++) {
1313       nz   = procsnz[i];
1314       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1315       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1316     }
1317     ierr = PetscFree(cols);CHKERRQ(ierr);
1318   } else {
1319     /* determine buffer space needed for message */
1320     nz = 0;
1321     for (i=0; i<m; i++) {
1322       nz += ourlens[i];
1323     }
1324     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1325 
1326     /* receive message of column indices*/
1327     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1328     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1329     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1330   }
1331 
1332   /* loop over local rows, determining number of off diagonal entries */
1333   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1334   jj = 0;
1335   for (i=0; i<m; i++) {
1336     for (j=0; j<ourlens[i]; j++) {
1337       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1338       jj++;
1339     }
1340   }
1341 
1342   /* create our matrix */
1343   for (i=0; i<m; i++) {
1344     ourlens[i] -= offlens[i];
1345   }
1346   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1347   A = *newmat;
1348   for (i=0; i<m; i++) {
1349     ourlens[i] += offlens[i];
1350   }
1351 
1352   if (!rank) {
1353     vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals);
1354 
1355     /* read in my part of the matrix numerical values  */
1356     nz = procsnz[0];
1357     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1358 
1359     /* insert into matrix */
1360     jj      = rstart;
1361     smycols = mycols;
1362     svals   = vals;
1363     for (i=0; i<m; i++) {
1364       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1365       smycols += ourlens[i];
1366       svals   += ourlens[i];
1367       jj++;
1368     }
1369 
1370     /* read in other processors and ship out */
1371     for (i=1; i<size; i++) {
1372       nz   = procsnz[i];
1373       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1374       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1375     }
1376     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1377   } else {
1378     /* receive numeric values */
1379     vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals);
1380 
1381     /* receive message of values*/
1382     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1383     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1384     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1385 
1386     /* insert into matrix */
1387     jj      = rstart;
1388     smycols = mycols;
1389     svals   = vals;
1390     for (i=0; i<m; i++) {
1391       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1392       smycols += ourlens[i];
1393       svals   += ourlens[i];
1394       jj++;
1395     }
1396   }
1397   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1398   ierr = PetscFree(vals);CHKERRQ(ierr);
1399   ierr = PetscFree(mycols);CHKERRQ(ierr);
1400   ierr = PetscFree(rowners);CHKERRQ(ierr);
1401 
1402   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1403   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 EXTERN_C_END
1407 
1408 
1409 
1410 
1411 
1412