xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 7044107279799bb7fd2e4de09560a77cd45ddebe)
1 /*
2    Basic functions for basic parallel dense matrices.
3 */
4 
5 #include "src/mat/impls/dense/mpi/mpidense.h"
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
10 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
11 {
12   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
13   PetscErrorCode ierr;
14   int          m = A->m,rstart = mdn->rstart;
15   PetscScalar  *array;
16   MPI_Comm     comm;
17 
18   PetscFunctionBegin;
19   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
20 
21   /* The reuse aspect is not implemented efficiently */
22   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
23 
24   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
25   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
26   ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr);
27   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
28   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatSetValues_MPIDense"
40 PetscErrorCode MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
41 {
42   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
43   PetscErrorCode ierr;
44   int          i,j,rstart = A->rstart,rend = A->rend,row;
45   PetscTruth   roworiented = A->roworiented;
46 
47   PetscFunctionBegin;
48   for (i=0; i<m; i++) {
49     if (idxm[i] < 0) continue;
50     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
51     if (idxm[i] >= rstart && idxm[i] < rend) {
52       row = idxm[i] - rstart;
53       if (roworiented) {
54         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
55       } else {
56         for (j=0; j<n; j++) {
57           if (idxn[j] < 0) continue;
58           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
59           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
60         }
61       }
62     } else {
63       if (!A->donotstash) {
64         if (roworiented) {
65           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
66         } else {
67           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
68         }
69       }
70     }
71   }
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatGetValues_MPIDense"
77 PetscErrorCode MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
78 {
79   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
80   PetscErrorCode ierr;
81   int          i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82 
83   PetscFunctionBegin;
84   for (i=0; i<m; i++) {
85     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for (j=0; j<n; j++) {
90         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91         if (idxn[j] >= mat->N) {
92           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93         }
94         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
95       }
96     } else {
97       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98     }
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatGetArray_MPIDense"
105 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
106 {
107   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
117 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118 {
119   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121   PetscErrorCode ierr;
122   int          i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
123   PetscScalar  *av,*bv,*v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
128   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
129   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
130   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
131 
132   /* No parallel redistribution currently supported! Should really check each index set
133      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
134      original matrix! */
135 
136   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
137   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
138 
139   /* Check submatrix call */
140   if (scall == MAT_REUSE_MATRIX) {
141     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
142     /* Really need to test rows and column sizes! */
143     newmat = *B;
144   } else {
145     /* Create and fill new matrix */
146     ierr = MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);CHKERRQ(ierr);
147     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
148     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
149   }
150 
151   /* Now extract the data pointers and do the copy, column at a time */
152   newmatd = (Mat_MPIDense*)newmat->data;
153   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
154 
155   for (i=0; i<ncols; i++) {
156     av = v + nlrows*icol[i];
157     for (j=0; j<nrows; j++) {
158       *bv++ = av[irow[j] - rstart];
159     }
160   }
161 
162   /* Assemble the matrices so that the correct flags are set */
163   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165 
166   /* Free work space */
167   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
168   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
169   *B = newmat;
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatRestoreArray_MPIDense"
175 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
176 {
177   PetscFunctionBegin;
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
183 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
184 {
185   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
186   MPI_Comm     comm = mat->comm;
187   PetscErrorCode ierr;
188   int          nstash,reallocs;
189   InsertMode   addv;
190 
191   PetscFunctionBegin;
192   /* make sure all processors are either in INSERTMODE or ADDMODE */
193   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
194   if (addv == (ADD_VALUES|INSERT_VALUES)) {
195     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
196   }
197   mat->insertmode = addv; /* in case this processor had no cache */
198 
199   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
200   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
201   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
207 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
208 {
209   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
210   PetscErrorCode ierr;
211   int          i,n,*row,*col,flg,j,rstart,ncols;
212   PetscScalar  *val;
213   InsertMode   addv=mat->insertmode;
214 
215   PetscFunctionBegin;
216   /*  wait on receives */
217   while (1) {
218     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
219     if (!flg) break;
220 
221     for (i=0; i<n;) {
222       /* Now identify the consecutive vals belonging to the same row */
223       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
224       if (j < n) ncols = j-i;
225       else       ncols = n-i;
226       /* Now assemble all these values with a single function call */
227       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
228       i = j;
229     }
230   }
231   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatZeroEntries_MPIDense"
244 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
245 {
246   PetscErrorCode ierr;
247   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatGetBlockSize_MPIDense"
256 PetscErrorCode MatGetBlockSize_MPIDense(Mat A,int *bs)
257 {
258   PetscFunctionBegin;
259   *bs = 1;
260   PetscFunctionReturn(0);
261 }
262 
263 /* the code does not do the diagonal entries correctly unless the
264    matrix is square and the column and row owerships are identical.
265    This is a BUG. The only way to fix it seems to be to access
266    mdn->A and mdn->B directly and not through the MatZeroRows()
267    routine.
268 */
269 #undef __FUNCT__
270 #define __FUNCT__ "MatZeroRows_MPIDense"
271 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
272 {
273   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
274   PetscErrorCode ierr;
275   int            i,N,*rows,*owners = l->rowners,size = l->size;
276   int            *nprocs,j,idx,nsends;
277   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
278   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
279   int            *lens,imdex,*lrows,*values;
280   MPI_Comm       comm = A->comm;
281   MPI_Request    *send_waits,*recv_waits;
282   MPI_Status     recv_status,*send_status;
283   IS             istmp;
284   PetscTruth     found;
285 
286   PetscFunctionBegin;
287   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
288   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
289 
290   /*  first count number of contributors to each processor */
291   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
292   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
293   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
294   for (i=0; i<N; i++) {
295     idx = rows[i];
296     found = PETSC_FALSE;
297     for (j=0; j<size; j++) {
298       if (idx >= owners[j] && idx < owners[j+1]) {
299         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
300       }
301     }
302     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
303   }
304   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
305 
306   /* inform other processors of number of messages and max length*/
307   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
308 
309   /* post receives:   */
310   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
311   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
312   for (i=0; i<nrecvs; i++) {
313     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
314   }
315 
316   /* do sends:
317       1) starts[i] gives the starting index in svalues for stuff going to
318          the ith processor
319   */
320   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
321   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
322   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
323   starts[0]  = 0;
324   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
325   for (i=0; i<N; i++) {
326     svalues[starts[owner[i]]++] = rows[i];
327   }
328   ISRestoreIndices(is,&rows);
329 
330   starts[0] = 0;
331   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
332   count = 0;
333   for (i=0; i<size; i++) {
334     if (nprocs[2*i+1]) {
335       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
336     }
337   }
338   ierr = PetscFree(starts);CHKERRQ(ierr);
339 
340   base = owners[rank];
341 
342   /*  wait on receives */
343   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
344   source = lens + nrecvs;
345   count  = nrecvs; slen = 0;
346   while (count) {
347     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
348     /* unpack receives into our local space */
349     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
350     source[imdex]  = recv_status.MPI_SOURCE;
351     lens[imdex]  = n;
352     slen += n;
353     count--;
354   }
355   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
356 
357   /* move the data into the send scatter */
358   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
359   count = 0;
360   for (i=0; i<nrecvs; i++) {
361     values = rvalues + i*nmax;
362     for (j=0; j<lens[i]; j++) {
363       lrows[count++] = values[j] - base;
364     }
365   }
366   ierr = PetscFree(rvalues);CHKERRQ(ierr);
367   ierr = PetscFree(lens);CHKERRQ(ierr);
368   ierr = PetscFree(owner);CHKERRQ(ierr);
369   ierr = PetscFree(nprocs);CHKERRQ(ierr);
370 
371   /* actually zap the local rows */
372   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
373   PetscLogObjectParent(A,istmp);
374   ierr = PetscFree(lrows);CHKERRQ(ierr);
375   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
376   ierr = ISDestroy(istmp);CHKERRQ(ierr);
377 
378   /* wait on sends */
379   if (nsends) {
380     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
381     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
382     ierr = PetscFree(send_status);CHKERRQ(ierr);
383   }
384   ierr = PetscFree(send_waits);CHKERRQ(ierr);
385   ierr = PetscFree(svalues);CHKERRQ(ierr);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNCT__
391 #define __FUNCT__ "MatMult_MPIDense"
392 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
395   PetscErrorCode ierr;
396 
397   PetscFunctionBegin;
398   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "MatMultAdd_MPIDense"
406 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
409   PetscErrorCode ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatMultTranspose_MPIDense"
420 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
421 {
422   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
423   PetscErrorCode ierr;
424   PetscScalar  zero = 0.0;
425 
426   PetscFunctionBegin;
427   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
428   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
429   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
436 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
439   PetscErrorCode ierr;
440 
441   PetscFunctionBegin;
442   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
443   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
444   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "MatGetDiagonal_MPIDense"
451 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
452 {
453   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
454   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
455   PetscErrorCode ierr;
456   int          len,i,n,m = A->m,radd;
457   PetscScalar  *x,zero = 0.0;
458 
459   PetscFunctionBegin;
460   ierr = VecSet(&zero,v);CHKERRQ(ierr);
461   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
462   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
463   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
464   len  = PetscMin(a->A->m,a->A->n);
465   radd = a->rstart*m;
466   for (i=0; i<len; i++) {
467     x[i] = aloc->v[radd + i*m + i];
468   }
469   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
470   PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "MatDestroy_MPIDense"
475 PetscErrorCode MatDestroy_MPIDense(Mat mat)
476 {
477   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
478   PetscErrorCode ierr;
479 
480   PetscFunctionBegin;
481 
482 #if defined(PETSC_USE_LOG)
483   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
484 #endif
485   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
486   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
487   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
488   if (mdn->lvec)   VecDestroy(mdn->lvec);
489   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
490   if (mdn->factor) {
491     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
492     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
493     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
494     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
495   }
496   ierr = PetscFree(mdn);CHKERRQ(ierr);
497   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatView_MPIDense_Binary"
504 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
505 {
506   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
507   PetscErrorCode ierr;
508 
509   PetscFunctionBegin;
510   if (mdn->size == 1) {
511     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
512   }
513   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNCT__
518 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
519 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
520 {
521   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
522   PetscErrorCode    ierr;
523   PetscMPIInt       size = mdn->size,rank = mdn->rank;
524   PetscViewerType   vtype;
525   PetscTruth        iascii,isdraw;
526   PetscViewer       sviewer;
527   PetscViewerFormat format;
528 
529   PetscFunctionBegin;
530   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
531   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
532   if (iascii) {
533     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
534     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
535     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
536       MatInfo info;
537       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
538       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
539                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
540       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
541       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
542       PetscFunctionReturn(0);
543     } else if (format == PETSC_VIEWER_ASCII_INFO) {
544       PetscFunctionReturn(0);
545     }
546   } else if (isdraw) {
547     PetscDraw       draw;
548     PetscTruth isnull;
549 
550     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
551     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
552     if (isnull) PetscFunctionReturn(0);
553   }
554 
555   if (size == 1) {
556     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
557   } else {
558     /* assemble the entire matrix onto first processor. */
559     Mat               A;
560     int               M = mat->M,N = mat->N,m,row,i,nz;
561     const int         *cols;
562     const PetscScalar *vals;
563 
564     if (!rank) {
565       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
566     } else {
567       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
568     }
569     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
570     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
571     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
572     PetscLogObjectParent(mat,A);
573 
574     /* Copy the matrix ... This isn't the most efficient means,
575        but it's quick for now */
576     row = mdn->rstart; m = mdn->A->m;
577     for (i=0; i<m; i++) {
578       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
579       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
580       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
581       row++;
582     }
583 
584     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
585     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
586     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
587     if (!rank) {
588       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
589     }
590     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
591     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
592     ierr = MatDestroy(A);CHKERRQ(ierr);
593   }
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatView_MPIDense"
599 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
600 {
601   PetscErrorCode ierr;
602   PetscTruth iascii,isbinary,isdraw,issocket;
603 
604   PetscFunctionBegin;
605 
606   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
607   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
608   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
609   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
610 
611   if (iascii || issocket || isdraw) {
612     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
613   } else if (isbinary) {
614     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
615   } else {
616     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
617   }
618   PetscFunctionReturn(0);
619 }
620 
621 #undef __FUNCT__
622 #define __FUNCT__ "MatGetInfo_MPIDense"
623 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
624 {
625   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
626   Mat          mdn = mat->A;
627   PetscErrorCode ierr;
628   PetscReal    isend[5],irecv[5];
629 
630   PetscFunctionBegin;
631   info->rows_global    = (double)A->M;
632   info->columns_global = (double)A->N;
633   info->rows_local     = (double)A->m;
634   info->columns_local  = (double)A->N;
635   info->block_size     = 1.0;
636   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
637   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
638   isend[3] = info->memory;  isend[4] = info->mallocs;
639   if (flag == MAT_LOCAL) {
640     info->nz_used      = isend[0];
641     info->nz_allocated = isend[1];
642     info->nz_unneeded  = isend[2];
643     info->memory       = isend[3];
644     info->mallocs      = isend[4];
645   } else if (flag == MAT_GLOBAL_MAX) {
646     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
647     info->nz_used      = irecv[0];
648     info->nz_allocated = irecv[1];
649     info->nz_unneeded  = irecv[2];
650     info->memory       = irecv[3];
651     info->mallocs      = irecv[4];
652   } else if (flag == MAT_GLOBAL_SUM) {
653     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
654     info->nz_used      = irecv[0];
655     info->nz_allocated = irecv[1];
656     info->nz_unneeded  = irecv[2];
657     info->memory       = irecv[3];
658     info->mallocs      = irecv[4];
659   }
660   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
661   info->fill_ratio_needed = 0;
662   info->factor_mallocs    = 0;
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatSetOption_MPIDense"
668 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
669 {
670   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
671   PetscErrorCode ierr;
672 
673   PetscFunctionBegin;
674   switch (op) {
675   case MAT_NO_NEW_NONZERO_LOCATIONS:
676   case MAT_YES_NEW_NONZERO_LOCATIONS:
677   case MAT_NEW_NONZERO_LOCATION_ERR:
678   case MAT_NEW_NONZERO_ALLOCATION_ERR:
679   case MAT_COLUMNS_SORTED:
680   case MAT_COLUMNS_UNSORTED:
681     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
682     break;
683   case MAT_ROW_ORIENTED:
684     a->roworiented = PETSC_TRUE;
685     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
686     break;
687   case MAT_ROWS_SORTED:
688   case MAT_ROWS_UNSORTED:
689   case MAT_YES_NEW_DIAGONALS:
690   case MAT_USE_HASH_TABLE:
691     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
692     break;
693   case MAT_COLUMN_ORIENTED:
694     a->roworiented = PETSC_FALSE;
695     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
696     break;
697   case MAT_IGNORE_OFF_PROC_ENTRIES:
698     a->donotstash = PETSC_TRUE;
699     break;
700   case MAT_NO_NEW_DIAGONALS:
701     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
702   case MAT_SYMMETRIC:
703   case MAT_STRUCTURALLY_SYMMETRIC:
704   case MAT_NOT_SYMMETRIC:
705   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
706   case MAT_HERMITIAN:
707   case MAT_NOT_HERMITIAN:
708   case MAT_SYMMETRY_ETERNAL:
709   case MAT_NOT_SYMMETRY_ETERNAL:
710     break;
711   default:
712     SETERRQ(PETSC_ERR_SUP,"unknown option");
713   }
714   PetscFunctionReturn(0);
715 }
716 
717 #undef __FUNCT__
718 #define __FUNCT__ "MatGetRow_MPIDense"
719 PetscErrorCode MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
720 {
721   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
722   PetscErrorCode ierr;
723   int          lrow,rstart = mat->rstart,rend = mat->rend;
724 
725   PetscFunctionBegin;
726   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
727   lrow = row - rstart;
728   ierr = MatGetRow(mat->A,lrow,nz,(const int **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatRestoreRow_MPIDense"
734 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
735 {
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
740   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatDiagonalScale_MPIDense"
746 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
747 {
748   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
749   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
750   PetscScalar  *l,*r,x,*v;
751   PetscErrorCode ierr;
752   int          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
753 
754   PetscFunctionBegin;
755   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
756   if (ll) {
757     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
758     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
759     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
760     for (i=0; i<m; i++) {
761       x = l[i];
762       v = mat->v + i;
763       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
764     }
765     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
766     PetscLogFlops(n*m);
767   }
768   if (rr) {
769     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
770     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
771     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
772     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
773     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
774     for (i=0; i<n; i++) {
775       x = r[i];
776       v = mat->v + i*m;
777       for (j=0; j<m; j++) { (*v++) *= x;}
778     }
779     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
780     PetscLogFlops(n*m);
781   }
782   PetscFunctionReturn(0);
783 }
784 
785 #undef __FUNCT__
786 #define __FUNCT__ "MatNorm_MPIDense"
787 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
788 {
789   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
790   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
791   PetscErrorCode ierr;
792   int          i,j;
793   PetscReal    sum = 0.0;
794   PetscScalar  *v = mat->v;
795 
796   PetscFunctionBegin;
797   if (mdn->size == 1) {
798     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
799   } else {
800     if (type == NORM_FROBENIUS) {
801       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
802 #if defined(PETSC_USE_COMPLEX)
803         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
804 #else
805         sum += (*v)*(*v); v++;
806 #endif
807       }
808       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
809       *nrm = sqrt(*nrm);
810       PetscLogFlops(2*mdn->A->n*mdn->A->m);
811     } else if (type == NORM_1) {
812       PetscReal *tmp,*tmp2;
813       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
814       tmp2 = tmp + A->N;
815       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
816       *nrm = 0.0;
817       v = mat->v;
818       for (j=0; j<mdn->A->n; j++) {
819         for (i=0; i<mdn->A->m; i++) {
820           tmp[j] += PetscAbsScalar(*v);  v++;
821         }
822       }
823       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
824       for (j=0; j<A->N; j++) {
825         if (tmp2[j] > *nrm) *nrm = tmp2[j];
826       }
827       ierr = PetscFree(tmp);CHKERRQ(ierr);
828       PetscLogFlops(A->n*A->m);
829     } else if (type == NORM_INFINITY) { /* max row norm */
830       PetscReal ntemp;
831       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
832       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
833     } else {
834       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
835     }
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 #undef __FUNCT__
841 #define __FUNCT__ "MatTranspose_MPIDense"
842 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
843 {
844   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
845   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
846   Mat          B;
847   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
848   PetscErrorCode ierr;
849   int          j,i;
850   PetscScalar  *v;
851 
852   PetscFunctionBegin;
853   if (!matout && M != N) {
854     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
855   }
856   ierr = MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);CHKERRQ(ierr);
857   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
858   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
859 
860   m = a->A->m; n = a->A->n; v = Aloc->v;
861   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
862   for (j=0; j<n; j++) {
863     for (i=0; i<m; i++) rwork[i] = rstart + i;
864     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
865     v   += m;
866   }
867   ierr = PetscFree(rwork);CHKERRQ(ierr);
868   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
869   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
870   if (matout) {
871     *matout = B;
872   } else {
873     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
874   }
875   PetscFunctionReturn(0);
876 }
877 
878 #include "petscblaslapack.h"
879 #undef __FUNCT__
880 #define __FUNCT__ "MatScale_MPIDense"
881 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
882 {
883   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
884   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
885   PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;
886 
887   PetscFunctionBegin;
888   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
889   PetscLogFlops(nz);
890   PetscFunctionReturn(0);
891 }
892 
893 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
897 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
898 {
899   PetscErrorCode ierr;
900 
901   PetscFunctionBegin;
902   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
903   PetscFunctionReturn(0);
904 }
905 
906 /* -------------------------------------------------------------------*/
907 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
908        MatGetRow_MPIDense,
909        MatRestoreRow_MPIDense,
910        MatMult_MPIDense,
911 /* 4*/ MatMultAdd_MPIDense,
912        MatMultTranspose_MPIDense,
913        MatMultTransposeAdd_MPIDense,
914        0,
915        0,
916        0,
917 /*10*/ 0,
918        0,
919        0,
920        0,
921        MatTranspose_MPIDense,
922 /*15*/ MatGetInfo_MPIDense,
923        0,
924        MatGetDiagonal_MPIDense,
925        MatDiagonalScale_MPIDense,
926        MatNorm_MPIDense,
927 /*20*/ MatAssemblyBegin_MPIDense,
928        MatAssemblyEnd_MPIDense,
929        0,
930        MatSetOption_MPIDense,
931        MatZeroEntries_MPIDense,
932 /*25*/ MatZeroRows_MPIDense,
933        0,
934        0,
935        0,
936        0,
937 /*30*/ MatSetUpPreallocation_MPIDense,
938        0,
939        0,
940        MatGetArray_MPIDense,
941        MatRestoreArray_MPIDense,
942 /*35*/ MatDuplicate_MPIDense,
943        0,
944        0,
945        0,
946        0,
947 /*40*/ 0,
948        MatGetSubMatrices_MPIDense,
949        0,
950        MatGetValues_MPIDense,
951        0,
952 /*45*/ 0,
953        MatScale_MPIDense,
954        0,
955        0,
956        0,
957 /*50*/ MatGetBlockSize_MPIDense,
958        0,
959        0,
960        0,
961        0,
962 /*55*/ 0,
963        0,
964        0,
965        0,
966        0,
967 /*60*/ MatGetSubMatrix_MPIDense,
968        MatDestroy_MPIDense,
969        MatView_MPIDense,
970        MatGetPetscMaps_Petsc,
971        0,
972 /*65*/ 0,
973        0,
974        0,
975        0,
976        0,
977 /*70*/ 0,
978        0,
979        0,
980        0,
981        0,
982 /*75*/ 0,
983        0,
984        0,
985        0,
986        0,
987 /*80*/ 0,
988        0,
989        0,
990        0,
991 /*84*/ MatLoad_MPIDense,
992        0,
993        0,
994        0,
995        0,
996        0,
997 /*90*/ 0,
998        0,
999        0,
1000        0,
1001        0,
1002 /*95*/ 0,
1003        0,
1004        0,
1005        0};
1006 
1007 EXTERN_C_BEGIN
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1010 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1011 {
1012   Mat_MPIDense *a;
1013   PetscErrorCode ierr;
1014 
1015   PetscFunctionBegin;
1016   mat->preallocated = PETSC_TRUE;
1017   /* Note:  For now, when data is specified above, this assumes the user correctly
1018    allocates the local dense storage space.  We should add error checking. */
1019 
1020   a    = (Mat_MPIDense*)mat->data;
1021   ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr);
1022   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1023   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1024   PetscLogObjectParent(mat,a->A);
1025   PetscFunctionReturn(0);
1026 }
1027 EXTERN_C_END
1028 
1029 /*MC
1030    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1031 
1032    Options Database Keys:
1033 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1034 
1035   Level: beginner
1036 
1037 .seealso: MatCreateMPIDense
1038 M*/
1039 
1040 EXTERN_C_BEGIN
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatCreate_MPIDense"
1043 PetscErrorCode MatCreate_MPIDense(Mat mat)
1044 {
1045   Mat_MPIDense *a;
1046   PetscErrorCode ierr;
1047   int          i;
1048 
1049   PetscFunctionBegin;
1050   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1051   mat->data         = (void*)a;
1052   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1053   mat->factor       = 0;
1054   mat->mapping      = 0;
1055 
1056   a->factor       = 0;
1057   mat->insertmode = NOT_SET_VALUES;
1058   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1059   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1060 
1061   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1062   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1063   a->nvec = mat->n;
1064 
1065   /* the information in the maps duplicates the information computed below, eventually
1066      we should remove the duplicate information that is not contained in the maps */
1067   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1068   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1069 
1070   /* build local table of row and column ownerships */
1071   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1072   a->cowners = a->rowners + a->size + 1;
1073   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1074   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1075   a->rowners[0] = 0;
1076   for (i=2; i<=a->size; i++) {
1077     a->rowners[i] += a->rowners[i-1];
1078   }
1079   a->rstart = a->rowners[a->rank];
1080   a->rend   = a->rowners[a->rank+1];
1081   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1082   a->cowners[0] = 0;
1083   for (i=2; i<=a->size; i++) {
1084     a->cowners[i] += a->cowners[i-1];
1085   }
1086 
1087   /* build cache for off array entries formed */
1088   a->donotstash = PETSC_FALSE;
1089   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1090 
1091   /* stuff used for matrix vector multiply */
1092   a->lvec        = 0;
1093   a->Mvctx       = 0;
1094   a->roworiented = PETSC_TRUE;
1095 
1096   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1097                                      "MatGetDiagonalBlock_MPIDense",
1098                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1100                                      "MatMPIDenseSetPreallocation_MPIDense",
1101                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 EXTERN_C_END
1105 
1106 /*MC
1107    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1108 
1109    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1110    and MATMPIDENSE otherwise.
1111 
1112    Options Database Keys:
1113 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1114 
1115   Level: beginner
1116 
1117 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1118 M*/
1119 
1120 EXTERN_C_BEGIN
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatCreate_Dense"
1123 PetscErrorCode MatCreate_Dense(Mat A)
1124 {
1125   PetscErrorCode ierr;
1126   int size;
1127 
1128   PetscFunctionBegin;
1129   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1130   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1131   if (size == 1) {
1132     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1133   } else {
1134     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1135   }
1136   PetscFunctionReturn(0);
1137 }
1138 EXTERN_C_END
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1142 /*@C
1143    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1144 
1145    Not collective
1146 
1147    Input Parameters:
1148 .  A - the matrix
1149 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1150    to control all matrix memory allocation.
1151 
1152    Notes:
1153    The dense format is fully compatible with standard Fortran 77
1154    storage by columns.
1155 
1156    The data input variable is intended primarily for Fortran programmers
1157    who wish to allocate their own matrix memory space.  Most users should
1158    set data=PETSC_NULL.
1159 
1160    Level: intermediate
1161 
1162 .keywords: matrix,dense, parallel
1163 
1164 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1165 @*/
1166 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1167 {
1168   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1169 
1170   PetscFunctionBegin;
1171   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1172   if (f) {
1173     ierr = (*f)(mat,data);CHKERRQ(ierr);
1174   }
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatCreateMPIDense"
1180 /*@C
1181    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1182 
1183    Collective on MPI_Comm
1184 
1185    Input Parameters:
1186 +  comm - MPI communicator
1187 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1188 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1189 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1190 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1191 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1192    to control all matrix memory allocation.
1193 
1194    Output Parameter:
1195 .  A - the matrix
1196 
1197    Notes:
1198    The dense format is fully compatible with standard Fortran 77
1199    storage by columns.
1200 
1201    The data input variable is intended primarily for Fortran programmers
1202    who wish to allocate their own matrix memory space.  Most users should
1203    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1204 
1205    The user MUST specify either the local or global matrix dimensions
1206    (possibly both).
1207 
1208    Level: intermediate
1209 
1210 .keywords: matrix,dense, parallel
1211 
1212 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1213 @*/
1214 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1215 {
1216   PetscErrorCode ierr;
1217   int size;
1218 
1219   PetscFunctionBegin;
1220   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1221   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1222   if (size > 1) {
1223     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1224     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1225   } else {
1226     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1227     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1228   }
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatDuplicate_MPIDense"
1234 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1235 {
1236   Mat          mat;
1237   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1238   PetscErrorCode ierr;
1239 
1240   PetscFunctionBegin;
1241   *newmat       = 0;
1242   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1243   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1244   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1245   mat->data         = (void*)a;
1246   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1247   mat->factor       = A->factor;
1248   mat->assembled    = PETSC_TRUE;
1249   mat->preallocated = PETSC_TRUE;
1250 
1251   a->rstart       = oldmat->rstart;
1252   a->rend         = oldmat->rend;
1253   a->size         = oldmat->size;
1254   a->rank         = oldmat->rank;
1255   mat->insertmode = NOT_SET_VALUES;
1256   a->nvec         = oldmat->nvec;
1257   a->donotstash   = oldmat->donotstash;
1258   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1259   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1260   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1261   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1262 
1263   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1264   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1265   PetscLogObjectParent(mat,a->A);
1266   *newmat = mat;
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #include "petscsys.h"
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1274 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,const MatType type,Mat *newmat)
1275 {
1276   PetscErrorCode ierr;
1277   PetscMPIInt    rank,size;
1278   int            *rowners,i,m,nz,j;
1279   PetscScalar    *array,*vals,*vals_ptr;
1280   MPI_Status     status;
1281 
1282   PetscFunctionBegin;
1283   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1284   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1285 
1286   /* determine ownership of all rows */
1287   m          = M/size + ((M % size) > rank);
1288   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1289   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1290   rowners[0] = 0;
1291   for (i=2; i<=size; i++) {
1292     rowners[i] += rowners[i-1];
1293   }
1294 
1295   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1296   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1297   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1298   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1299 
1300   if (!rank) {
1301     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1302 
1303     /* read in my part of the matrix numerical values  */
1304     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1305 
1306     /* insert into matrix-by row (this is why cannot directly read into array */
1307     vals_ptr = vals;
1308     for (i=0; i<m; i++) {
1309       for (j=0; j<N; j++) {
1310         array[i + j*m] = *vals_ptr++;
1311       }
1312     }
1313 
1314     /* read in other processors and ship out */
1315     for (i=1; i<size; i++) {
1316       nz   = (rowners[i+1] - rowners[i])*N;
1317       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1318       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1319     }
1320   } else {
1321     /* receive numeric values */
1322     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1323 
1324     /* receive message of values*/
1325     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1326 
1327     /* insert into matrix-by row (this is why cannot directly read into array */
1328     vals_ptr = vals;
1329     for (i=0; i<m; i++) {
1330       for (j=0; j<N; j++) {
1331         array[i + j*m] = *vals_ptr++;
1332       }
1333     }
1334   }
1335   ierr = PetscFree(rowners);CHKERRQ(ierr);
1336   ierr = PetscFree(vals);CHKERRQ(ierr);
1337   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1338   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatLoad_MPIDense"
1344 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1345 {
1346   Mat            A;
1347   PetscScalar    *vals,*svals;
1348   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1349   MPI_Status     status;
1350   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1351   int            header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1352   int            *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1353   int            i,nz,j,rstart,rend,fd;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1358   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1359   if (!rank) {
1360     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1361     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1362     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1363   }
1364 
1365   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1366   M = header[1]; N = header[2]; nz = header[3];
1367 
1368   /*
1369        Handle case where matrix is stored on disk as a dense matrix
1370   */
1371   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1372     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1373     PetscFunctionReturn(0);
1374   }
1375 
1376   /* determine ownership of all rows */
1377   m          = M/size + ((M % size) > rank);
1378   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1379   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1380   rowners[0] = 0;
1381   for (i=2; i<=size; i++) {
1382     rowners[i] += rowners[i-1];
1383   }
1384   rstart = rowners[rank];
1385   rend   = rowners[rank+1];
1386 
1387   /* distribute row lengths to all processors */
1388   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1389   offlens = ourlens + (rend-rstart);
1390   if (!rank) {
1391     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1392     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1393     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1394     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1395     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1396     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1397   } else {
1398     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1399   }
1400 
1401   if (!rank) {
1402     /* calculate the number of nonzeros on each processor */
1403     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1404     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1405     for (i=0; i<size; i++) {
1406       for (j=rowners[i]; j< rowners[i+1]; j++) {
1407         procsnz[i] += rowlengths[j];
1408       }
1409     }
1410     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1411 
1412     /* determine max buffer needed and allocate it */
1413     maxnz = 0;
1414     for (i=0; i<size; i++) {
1415       maxnz = PetscMax(maxnz,procsnz[i]);
1416     }
1417     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1418 
1419     /* read in my part of the matrix column indices  */
1420     nz = procsnz[0];
1421     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1422     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1423 
1424     /* read in every one elses and ship off */
1425     for (i=1; i<size; i++) {
1426       nz   = procsnz[i];
1427       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1428       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1429     }
1430     ierr = PetscFree(cols);CHKERRQ(ierr);
1431   } else {
1432     /* determine buffer space needed for message */
1433     nz = 0;
1434     for (i=0; i<m; i++) {
1435       nz += ourlens[i];
1436     }
1437     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1438 
1439     /* receive message of column indices*/
1440     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1441     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1442     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1443   }
1444 
1445   /* loop over local rows, determining number of off diagonal entries */
1446   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1447   jj = 0;
1448   for (i=0; i<m; i++) {
1449     for (j=0; j<ourlens[i]; j++) {
1450       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1451       jj++;
1452     }
1453   }
1454 
1455   /* create our matrix */
1456   for (i=0; i<m; i++) {
1457     ourlens[i] -= offlens[i];
1458   }
1459   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1460   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1461   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1462   A = *newmat;
1463   for (i=0; i<m; i++) {
1464     ourlens[i] += offlens[i];
1465   }
1466 
1467   if (!rank) {
1468     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1469 
1470     /* read in my part of the matrix numerical values  */
1471     nz = procsnz[0];
1472     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1473 
1474     /* insert into matrix */
1475     jj      = rstart;
1476     smycols = mycols;
1477     svals   = vals;
1478     for (i=0; i<m; i++) {
1479       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1480       smycols += ourlens[i];
1481       svals   += ourlens[i];
1482       jj++;
1483     }
1484 
1485     /* read in other processors and ship out */
1486     for (i=1; i<size; i++) {
1487       nz   = procsnz[i];
1488       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1489       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1490     }
1491     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1492   } else {
1493     /* receive numeric values */
1494     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1495 
1496     /* receive message of values*/
1497     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1498     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1499     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1500 
1501     /* insert into matrix */
1502     jj      = rstart;
1503     smycols = mycols;
1504     svals   = vals;
1505     for (i=0; i<m; i++) {
1506       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1507       smycols += ourlens[i];
1508       svals   += ourlens[i];
1509       jj++;
1510     }
1511   }
1512   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1513   ierr = PetscFree(vals);CHKERRQ(ierr);
1514   ierr = PetscFree(mycols);CHKERRQ(ierr);
1515   ierr = PetscFree(rowners);CHKERRQ(ierr);
1516 
1517   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1518   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 
1523 
1524 
1525 
1526