xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 8a8e0b3aa6734b05cd88677becbdcc98d7dc465d)
1 /*
2    Basic functions for basic parallel dense matrices.
3 */
4 
5 #include "src/mat/impls/dense/mpi/mpidense.h"
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
10 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
11 {
12   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
13   PetscErrorCode ierr;
14   PetscInt          m = A->m,rstart = mdn->rstart;
15   PetscScalar  *array;
16   MPI_Comm     comm;
17 
18   PetscFunctionBegin;
19   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
20 
21   /* The reuse aspect is not implemented efficiently */
22   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
23 
24   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
25   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
26   ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr);
27   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
28   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatSetValues_MPIDense"
40 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
41 {
42   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
43   PetscErrorCode ierr;
44   PetscInt          i,j,rstart = A->rstart,rend = A->rend,row;
45   PetscTruth   roworiented = A->roworiented;
46 
47   PetscFunctionBegin;
48   for (i=0; i<m; i++) {
49     if (idxm[i] < 0) continue;
50     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
51     if (idxm[i] >= rstart && idxm[i] < rend) {
52       row = idxm[i] - rstart;
53       if (roworiented) {
54         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
55       } else {
56         for (j=0; j<n; j++) {
57           if (idxn[j] < 0) continue;
58           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
59           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
60         }
61       }
62     } else {
63       if (!A->donotstash) {
64         if (roworiented) {
65           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
66         } else {
67           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
68         }
69       }
70     }
71   }
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatGetValues_MPIDense"
77 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
78 {
79   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
80   PetscErrorCode ierr;
81   PetscInt          i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82 
83   PetscFunctionBegin;
84   for (i=0; i<m; i++) {
85     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for (j=0; j<n; j++) {
90         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91         if (idxn[j] >= mat->N) {
92           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93         }
94         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
95       }
96     } else {
97       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98     }
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatGetArray_MPIDense"
105 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
106 {
107   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
117 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
118 {
119   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121   PetscErrorCode ierr;
122   PetscInt          i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
123   PetscScalar  *av,*bv,*v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
128   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
129   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
130   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
131 
132   /* No parallel redistribution currently supported! Should really check each index set
133      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
134      original matrix! */
135 
136   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
137   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
138 
139   /* Check submatrix call */
140   if (scall == MAT_REUSE_MATRIX) {
141     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
142     /* Really need to test rows and column sizes! */
143     newmat = *B;
144   } else {
145     /* Create and fill new matrix */
146     ierr = MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);CHKERRQ(ierr);
147     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
148     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
149   }
150 
151   /* Now extract the data pointers and do the copy, column at a time */
152   newmatd = (Mat_MPIDense*)newmat->data;
153   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
154 
155   for (i=0; i<ncols; i++) {
156     av = v + nlrows*icol[i];
157     for (j=0; j<nrows; j++) {
158       *bv++ = av[irow[j] - rstart];
159     }
160   }
161 
162   /* Assemble the matrices so that the correct flags are set */
163   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165 
166   /* Free work space */
167   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
168   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
169   *B = newmat;
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatRestoreArray_MPIDense"
175 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
176 {
177   PetscFunctionBegin;
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
183 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
184 {
185   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
186   MPI_Comm     comm = mat->comm;
187   PetscErrorCode ierr;
188   PetscInt          nstash,reallocs;
189   InsertMode   addv;
190 
191   PetscFunctionBegin;
192   /* make sure all processors are either in INSERTMODE or ADDMODE */
193   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
194   if (addv == (ADD_VALUES|INSERT_VALUES)) {
195     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
196   }
197   mat->insertmode = addv; /* in case this processor had no cache */
198 
199   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
200   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
201   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
207 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
208 {
209   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
210   PetscErrorCode  ierr;
211   PetscInt        i,*row,*col,flg,j,rstart,ncols;
212   PetscMPIInt     n;
213   PetscScalar     *val;
214   InsertMode      addv=mat->insertmode;
215 
216   PetscFunctionBegin;
217   /*  wait on receives */
218   while (1) {
219     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
220     if (!flg) break;
221 
222     for (i=0; i<n;) {
223       /* Now identify the consecutive vals belonging to the same row */
224       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
225       if (j < n) ncols = j-i;
226       else       ncols = n-i;
227       /* Now assemble all these values with a single function call */
228       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
229       i = j;
230     }
231   }
232   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
233 
234   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
235   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
236 
237   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
238     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
239   }
240   PetscFunctionReturn(0);
241 }
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatZeroEntries_MPIDense"
245 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
246 {
247   PetscErrorCode ierr;
248   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
249 
250   PetscFunctionBegin;
251   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 /* the code does not do the diagonal entries correctly unless the
256    matrix is square and the column and row owerships are identical.
257    This is a BUG. The only way to fix it seems to be to access
258    mdn->A and mdn->B directly and not through the MatZeroRows()
259    routine.
260 */
261 #undef __FUNCT__
262 #define __FUNCT__ "MatZeroRows_MPIDense"
263 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
264 {
265   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
266   PetscErrorCode ierr;
267   PetscInt       i,N,*rows,*owners = l->rowners;
268   PetscInt       *nprocs,j,idx,nsends;
269   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
270   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
271   PetscInt       *lens,*lrows,*values;
272   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
273   MPI_Comm       comm = A->comm;
274   MPI_Request    *send_waits,*recv_waits;
275   MPI_Status     recv_status,*send_status;
276   IS             istmp;
277   PetscTruth     found;
278 
279   PetscFunctionBegin;
280   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
281   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
282 
283   /*  first count number of contributors to each processor */
284   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
285   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
286   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
287   for (i=0; i<N; i++) {
288     idx = rows[i];
289     found = PETSC_FALSE;
290     for (j=0; j<size; j++) {
291       if (idx >= owners[j] && idx < owners[j+1]) {
292         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
293       }
294     }
295     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
296   }
297   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
298 
299   /* inform other processors of number of messages and max length*/
300   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
301 
302   /* post receives:   */
303   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
304   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
305   for (i=0; i<nrecvs; i++) {
306     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
307   }
308 
309   /* do sends:
310       1) starts[i] gives the starting index in svalues for stuff going to
311          the ith processor
312   */
313   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
314   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
315   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
316   starts[0]  = 0;
317   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
318   for (i=0; i<N; i++) {
319     svalues[starts[owner[i]]++] = rows[i];
320   }
321   ISRestoreIndices(is,&rows);
322 
323   starts[0] = 0;
324   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
325   count = 0;
326   for (i=0; i<size; i++) {
327     if (nprocs[2*i+1]) {
328       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
329     }
330   }
331   ierr = PetscFree(starts);CHKERRQ(ierr);
332 
333   base = owners[rank];
334 
335   /*  wait on receives */
336   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
337   source = lens + nrecvs;
338   count  = nrecvs; slen = 0;
339   while (count) {
340     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
341     /* unpack receives into our local space */
342     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
343     source[imdex]  = recv_status.MPI_SOURCE;
344     lens[imdex]    = n;
345     slen += n;
346     count--;
347   }
348   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
349 
350   /* move the data into the send scatter */
351   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
352   count = 0;
353   for (i=0; i<nrecvs; i++) {
354     values = rvalues + i*nmax;
355     for (j=0; j<lens[i]; j++) {
356       lrows[count++] = values[j] - base;
357     }
358   }
359   ierr = PetscFree(rvalues);CHKERRQ(ierr);
360   ierr = PetscFree(lens);CHKERRQ(ierr);
361   ierr = PetscFree(owner);CHKERRQ(ierr);
362   ierr = PetscFree(nprocs);CHKERRQ(ierr);
363 
364   /* actually zap the local rows */
365   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
366   PetscLogObjectParent(A,istmp);
367   ierr = PetscFree(lrows);CHKERRQ(ierr);
368   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
369   ierr = ISDestroy(istmp);CHKERRQ(ierr);
370 
371   /* wait on sends */
372   if (nsends) {
373     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
374     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
375     ierr = PetscFree(send_status);CHKERRQ(ierr);
376   }
377   ierr = PetscFree(send_waits);CHKERRQ(ierr);
378   ierr = PetscFree(svalues);CHKERRQ(ierr);
379 
380   PetscFunctionReturn(0);
381 }
382 
383 #undef __FUNCT__
384 #define __FUNCT__ "MatMult_MPIDense"
385 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
386 {
387   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
388   PetscErrorCode ierr;
389 
390   PetscFunctionBegin;
391   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
392   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
393   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "MatMultAdd_MPIDense"
399 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
400 {
401   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
406   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
407   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "MatMultTranspose_MPIDense"
413 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
414 {
415   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
416   PetscErrorCode ierr;
417   PetscScalar  zero = 0.0;
418 
419   PetscFunctionBegin;
420   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
421   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
422   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
423   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
429 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
430 {
431   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
432   PetscErrorCode ierr;
433 
434   PetscFunctionBegin;
435   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
436   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
437   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
438   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "MatGetDiagonal_MPIDense"
444 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
445 {
446   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
447   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
448   PetscErrorCode ierr;
449   PetscInt          len,i,n,m = A->m,radd;
450   PetscScalar  *x,zero = 0.0;
451 
452   PetscFunctionBegin;
453   ierr = VecSet(&zero,v);CHKERRQ(ierr);
454   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
455   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
456   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
457   len  = PetscMin(a->A->m,a->A->n);
458   radd = a->rstart*m;
459   for (i=0; i<len; i++) {
460     x[i] = aloc->v[radd + i*m + i];
461   }
462   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatDestroy_MPIDense"
468 PetscErrorCode MatDestroy_MPIDense(Mat mat)
469 {
470   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
471   PetscErrorCode ierr;
472 
473   PetscFunctionBegin;
474 
475 #if defined(PETSC_USE_LOG)
476   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
477 #endif
478   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
479   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
480   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
481   if (mdn->lvec)   VecDestroy(mdn->lvec);
482   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
483   if (mdn->factor) {
484     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
485     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
486     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
487     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
488   }
489   ierr = PetscFree(mdn);CHKERRQ(ierr);
490   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
491   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatView_MPIDense_Binary"
497 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
498 {
499   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
500   PetscErrorCode ierr;
501 
502   PetscFunctionBegin;
503   if (mdn->size == 1) {
504     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
505   }
506   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
512 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
513 {
514   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
515   PetscErrorCode    ierr;
516   PetscMPIInt       size = mdn->size,rank = mdn->rank;
517   PetscViewerType   vtype;
518   PetscTruth        iascii,isdraw;
519   PetscViewer       sviewer;
520   PetscViewerFormat format;
521 
522   PetscFunctionBegin;
523   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
524   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
525   if (iascii) {
526     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
527     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
528     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
529       MatInfo info;
530       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
531       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
532                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
533       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
534       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
535       PetscFunctionReturn(0);
536     } else if (format == PETSC_VIEWER_ASCII_INFO) {
537       PetscFunctionReturn(0);
538     }
539   } else if (isdraw) {
540     PetscDraw       draw;
541     PetscTruth isnull;
542 
543     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
544     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
545     if (isnull) PetscFunctionReturn(0);
546   }
547 
548   if (size == 1) {
549     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
550   } else {
551     /* assemble the entire matrix onto first processor. */
552     Mat               A;
553     PetscInt               M = mat->M,N = mat->N,m,row,i,nz;
554     const PetscInt         *cols;
555     const PetscScalar *vals;
556 
557     if (!rank) {
558       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
559     } else {
560       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
561     }
562     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
563     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
564     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
565     PetscLogObjectParent(mat,A);
566 
567     /* Copy the matrix ... This isn't the most efficient means,
568        but it's quick for now */
569     row = mdn->rstart; m = mdn->A->m;
570     for (i=0; i<m; i++) {
571       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
572       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
573       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
574       row++;
575     }
576 
577     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
578     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
579     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
580     if (!rank) {
581       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
582     }
583     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
584     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
585     ierr = MatDestroy(A);CHKERRQ(ierr);
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatView_MPIDense"
592 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
593 {
594   PetscErrorCode ierr;
595   PetscTruth iascii,isbinary,isdraw,issocket;
596 
597   PetscFunctionBegin;
598 
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
601   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
602   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
603 
604   if (iascii || issocket || isdraw) {
605     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
606   } else if (isbinary) {
607     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
608   } else {
609     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 #undef __FUNCT__
615 #define __FUNCT__ "MatGetInfo_MPIDense"
616 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
617 {
618   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
619   Mat          mdn = mat->A;
620   PetscErrorCode ierr;
621   PetscReal    isend[5],irecv[5];
622 
623   PetscFunctionBegin;
624   info->rows_global    = (double)A->M;
625   info->columns_global = (double)A->N;
626   info->rows_local     = (double)A->m;
627   info->columns_local  = (double)A->N;
628   info->block_size     = 1.0;
629   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
630   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
631   isend[3] = info->memory;  isend[4] = info->mallocs;
632   if (flag == MAT_LOCAL) {
633     info->nz_used      = isend[0];
634     info->nz_allocated = isend[1];
635     info->nz_unneeded  = isend[2];
636     info->memory       = isend[3];
637     info->mallocs      = isend[4];
638   } else if (flag == MAT_GLOBAL_MAX) {
639     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
640     info->nz_used      = irecv[0];
641     info->nz_allocated = irecv[1];
642     info->nz_unneeded  = irecv[2];
643     info->memory       = irecv[3];
644     info->mallocs      = irecv[4];
645   } else if (flag == MAT_GLOBAL_SUM) {
646     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
647     info->nz_used      = irecv[0];
648     info->nz_allocated = irecv[1];
649     info->nz_unneeded  = irecv[2];
650     info->memory       = irecv[3];
651     info->mallocs      = irecv[4];
652   }
653   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
654   info->fill_ratio_needed = 0;
655   info->factor_mallocs    = 0;
656   PetscFunctionReturn(0);
657 }
658 
659 #undef __FUNCT__
660 #define __FUNCT__ "MatSetOption_MPIDense"
661 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
662 {
663   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
664   PetscErrorCode ierr;
665 
666   PetscFunctionBegin;
667   switch (op) {
668   case MAT_NO_NEW_NONZERO_LOCATIONS:
669   case MAT_YES_NEW_NONZERO_LOCATIONS:
670   case MAT_NEW_NONZERO_LOCATION_ERR:
671   case MAT_NEW_NONZERO_ALLOCATION_ERR:
672   case MAT_COLUMNS_SORTED:
673   case MAT_COLUMNS_UNSORTED:
674     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
675     break;
676   case MAT_ROW_ORIENTED:
677     a->roworiented = PETSC_TRUE;
678     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
679     break;
680   case MAT_ROWS_SORTED:
681   case MAT_ROWS_UNSORTED:
682   case MAT_YES_NEW_DIAGONALS:
683   case MAT_USE_HASH_TABLE:
684     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
685     break;
686   case MAT_COLUMN_ORIENTED:
687     a->roworiented = PETSC_FALSE;
688     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
689     break;
690   case MAT_IGNORE_OFF_PROC_ENTRIES:
691     a->donotstash = PETSC_TRUE;
692     break;
693   case MAT_NO_NEW_DIAGONALS:
694     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
695   case MAT_SYMMETRIC:
696   case MAT_STRUCTURALLY_SYMMETRIC:
697   case MAT_NOT_SYMMETRIC:
698   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
699   case MAT_HERMITIAN:
700   case MAT_NOT_HERMITIAN:
701   case MAT_SYMMETRY_ETERNAL:
702   case MAT_NOT_SYMMETRY_ETERNAL:
703     break;
704   default:
705     SETERRQ(PETSC_ERR_SUP,"unknown option");
706   }
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatGetRow_MPIDense"
712 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
713 {
714   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
715   PetscErrorCode ierr;
716   PetscInt          lrow,rstart = mat->rstart,rend = mat->rend;
717 
718   PetscFunctionBegin;
719   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
720   lrow = row - rstart;
721   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatRestoreRow_MPIDense"
727 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
728 {
729   PetscErrorCode ierr;
730 
731   PetscFunctionBegin;
732   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
733   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNCT__
738 #define __FUNCT__ "MatDiagonalScale_MPIDense"
739 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
740 {
741   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
742   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
743   PetscScalar  *l,*r,x,*v;
744   PetscErrorCode ierr;
745   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
746 
747   PetscFunctionBegin;
748   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
749   if (ll) {
750     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
751     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
752     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
753     for (i=0; i<m; i++) {
754       x = l[i];
755       v = mat->v + i;
756       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
757     }
758     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
759     PetscLogFlops(n*m);
760   }
761   if (rr) {
762     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
763     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
764     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
765     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
766     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
767     for (i=0; i<n; i++) {
768       x = r[i];
769       v = mat->v + i*m;
770       for (j=0; j<m; j++) { (*v++) *= x;}
771     }
772     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
773     PetscLogFlops(n*m);
774   }
775   PetscFunctionReturn(0);
776 }
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatNorm_MPIDense"
780 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
781 {
782   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
783   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
784   PetscErrorCode ierr;
785   PetscInt          i,j;
786   PetscReal    sum = 0.0;
787   PetscScalar  *v = mat->v;
788 
789   PetscFunctionBegin;
790   if (mdn->size == 1) {
791     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
792   } else {
793     if (type == NORM_FROBENIUS) {
794       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
795 #if defined(PETSC_USE_COMPLEX)
796         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
797 #else
798         sum += (*v)*(*v); v++;
799 #endif
800       }
801       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
802       *nrm = sqrt(*nrm);
803       PetscLogFlops(2*mdn->A->n*mdn->A->m);
804     } else if (type == NORM_1) {
805       PetscReal *tmp,*tmp2;
806       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
807       tmp2 = tmp + A->N;
808       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
809       *nrm = 0.0;
810       v = mat->v;
811       for (j=0; j<mdn->A->n; j++) {
812         for (i=0; i<mdn->A->m; i++) {
813           tmp[j] += PetscAbsScalar(*v);  v++;
814         }
815       }
816       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
817       for (j=0; j<A->N; j++) {
818         if (tmp2[j] > *nrm) *nrm = tmp2[j];
819       }
820       ierr = PetscFree(tmp);CHKERRQ(ierr);
821       PetscLogFlops(A->n*A->m);
822     } else if (type == NORM_INFINITY) { /* max row norm */
823       PetscReal ntemp;
824       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
825       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
826     } else {
827       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
828     }
829   }
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatTranspose_MPIDense"
835 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
836 {
837   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
838   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
839   Mat          B;
840   PetscInt          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
841   PetscErrorCode ierr;
842   PetscInt          j,i;
843   PetscScalar  *v;
844 
845   PetscFunctionBegin;
846   if (!matout && M != N) {
847     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
848   }
849   ierr = MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);CHKERRQ(ierr);
850   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
851   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
852 
853   m = a->A->m; n = a->A->n; v = Aloc->v;
854   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
855   for (j=0; j<n; j++) {
856     for (i=0; i<m; i++) rwork[i] = rstart + i;
857     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
858     v   += m;
859   }
860   ierr = PetscFree(rwork);CHKERRQ(ierr);
861   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
863   if (matout) {
864     *matout = B;
865   } else {
866     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
867   }
868   PetscFunctionReturn(0);
869 }
870 
871 #include "petscblaslapack.h"
872 #undef __FUNCT__
873 #define __FUNCT__ "MatScale_MPIDense"
874 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
875 {
876   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
877   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
878   PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;
879 
880   PetscFunctionBegin;
881   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
882   PetscLogFlops(nz);
883   PetscFunctionReturn(0);
884 }
885 
886 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
887 
888 #undef __FUNCT__
889 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
890 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
891 {
892   PetscErrorCode ierr;
893 
894   PetscFunctionBegin;
895   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
896   PetscFunctionReturn(0);
897 }
898 
899 /* -------------------------------------------------------------------*/
900 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
901        MatGetRow_MPIDense,
902        MatRestoreRow_MPIDense,
903        MatMult_MPIDense,
904 /* 4*/ MatMultAdd_MPIDense,
905        MatMultTranspose_MPIDense,
906        MatMultTransposeAdd_MPIDense,
907        0,
908        0,
909        0,
910 /*10*/ 0,
911        0,
912        0,
913        0,
914        MatTranspose_MPIDense,
915 /*15*/ MatGetInfo_MPIDense,
916        0,
917        MatGetDiagonal_MPIDense,
918        MatDiagonalScale_MPIDense,
919        MatNorm_MPIDense,
920 /*20*/ MatAssemblyBegin_MPIDense,
921        MatAssemblyEnd_MPIDense,
922        0,
923        MatSetOption_MPIDense,
924        MatZeroEntries_MPIDense,
925 /*25*/ MatZeroRows_MPIDense,
926        0,
927        0,
928        0,
929        0,
930 /*30*/ MatSetUpPreallocation_MPIDense,
931        0,
932        0,
933        MatGetArray_MPIDense,
934        MatRestoreArray_MPIDense,
935 /*35*/ MatDuplicate_MPIDense,
936        0,
937        0,
938        0,
939        0,
940 /*40*/ 0,
941        MatGetSubMatrices_MPIDense,
942        0,
943        MatGetValues_MPIDense,
944        0,
945 /*45*/ 0,
946        MatScale_MPIDense,
947        0,
948        0,
949        0,
950 /*50*/ 0,
951        0,
952        0,
953        0,
954        0,
955 /*55*/ 0,
956        0,
957        0,
958        0,
959        0,
960 /*60*/ MatGetSubMatrix_MPIDense,
961        MatDestroy_MPIDense,
962        MatView_MPIDense,
963        MatGetPetscMaps_Petsc,
964        0,
965 /*65*/ 0,
966        0,
967        0,
968        0,
969        0,
970 /*70*/ 0,
971        0,
972        0,
973        0,
974        0,
975 /*75*/ 0,
976        0,
977        0,
978        0,
979        0,
980 /*80*/ 0,
981        0,
982        0,
983        0,
984 /*84*/ MatLoad_MPIDense,
985        0,
986        0,
987        0,
988        0,
989        0,
990 /*90*/ 0,
991        0,
992        0,
993        0,
994        0,
995 /*95*/ 0,
996        0,
997        0,
998        0};
999 
1000 EXTERN_C_BEGIN
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1003 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1004 {
1005   Mat_MPIDense *a;
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   mat->preallocated = PETSC_TRUE;
1010   /* Note:  For now, when data is specified above, this assumes the user correctly
1011    allocates the local dense storage space.  We should add error checking. */
1012 
1013   a    = (Mat_MPIDense*)mat->data;
1014   ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr);
1015   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1016   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1017   PetscLogObjectParent(mat,a->A);
1018   PetscFunctionReturn(0);
1019 }
1020 EXTERN_C_END
1021 
1022 /*MC
1023    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1024 
1025    Options Database Keys:
1026 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1027 
1028   Level: beginner
1029 
1030 .seealso: MatCreateMPIDense
1031 M*/
1032 
1033 EXTERN_C_BEGIN
1034 #undef __FUNCT__
1035 #define __FUNCT__ "MatCreate_MPIDense"
1036 PetscErrorCode MatCreate_MPIDense(Mat mat)
1037 {
1038   Mat_MPIDense *a;
1039   PetscErrorCode ierr;
1040   PetscInt          i;
1041 
1042   PetscFunctionBegin;
1043   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1044   mat->data         = (void*)a;
1045   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1046   mat->factor       = 0;
1047   mat->mapping      = 0;
1048 
1049   a->factor       = 0;
1050   mat->insertmode = NOT_SET_VALUES;
1051   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1052   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1053 
1054   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1055   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1056   a->nvec = mat->n;
1057 
1058   /* the information in the maps duplicates the information computed below, eventually
1059      we should remove the duplicate information that is not contained in the maps */
1060   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1061   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1062 
1063   /* build local table of row and column ownerships */
1064   ierr       = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1065   a->cowners = a->rowners + a->size + 1;
1066   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1067   ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1068   a->rowners[0] = 0;
1069   for (i=2; i<=a->size; i++) {
1070     a->rowners[i] += a->rowners[i-1];
1071   }
1072   a->rstart = a->rowners[a->rank];
1073   a->rend   = a->rowners[a->rank+1];
1074   ierr      = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1075   a->cowners[0] = 0;
1076   for (i=2; i<=a->size; i++) {
1077     a->cowners[i] += a->cowners[i-1];
1078   }
1079 
1080   /* build cache for off array entries formed */
1081   a->donotstash = PETSC_FALSE;
1082   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1083 
1084   /* stuff used for matrix vector multiply */
1085   a->lvec        = 0;
1086   a->Mvctx       = 0;
1087   a->roworiented = PETSC_TRUE;
1088 
1089   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1090                                      "MatGetDiagonalBlock_MPIDense",
1091                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1092   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1093                                      "MatMPIDenseSetPreallocation_MPIDense",
1094                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 EXTERN_C_END
1098 
1099 /*MC
1100    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1101 
1102    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1103    and MATMPIDENSE otherwise.
1104 
1105    Options Database Keys:
1106 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1107 
1108   Level: beginner
1109 
1110 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1111 M*/
1112 
1113 EXTERN_C_BEGIN
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatCreate_Dense"
1116 PetscErrorCode MatCreate_Dense(Mat A)
1117 {
1118   PetscErrorCode ierr;
1119   PetscMPIInt    size;
1120 
1121   PetscFunctionBegin;
1122   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1123   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1124   if (size == 1) {
1125     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1126   } else {
1127     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1128   }
1129   PetscFunctionReturn(0);
1130 }
1131 EXTERN_C_END
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1135 /*@C
1136    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1137 
1138    Not collective
1139 
1140    Input Parameters:
1141 .  A - the matrix
1142 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1143    to control all matrix memory allocation.
1144 
1145    Notes:
1146    The dense format is fully compatible with standard Fortran 77
1147    storage by columns.
1148 
1149    The data input variable is intended primarily for Fortran programmers
1150    who wish to allocate their own matrix memory space.  Most users should
1151    set data=PETSC_NULL.
1152 
1153    Level: intermediate
1154 
1155 .keywords: matrix,dense, parallel
1156 
1157 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1158 @*/
1159 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1160 {
1161   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1162 
1163   PetscFunctionBegin;
1164   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1165   if (f) {
1166     ierr = (*f)(mat,data);CHKERRQ(ierr);
1167   }
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatCreateMPIDense"
1173 /*@C
1174    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1175 
1176    Collective on MPI_Comm
1177 
1178    Input Parameters:
1179 +  comm - MPI communicator
1180 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1181 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1182 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1183 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1184 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1185    to control all matrix memory allocation.
1186 
1187    Output Parameter:
1188 .  A - the matrix
1189 
1190    Notes:
1191    The dense format is fully compatible with standard Fortran 77
1192    storage by columns.
1193 
1194    The data input variable is intended primarily for Fortran programmers
1195    who wish to allocate their own matrix memory space.  Most users should
1196    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1197 
1198    The user MUST specify either the local or global matrix dimensions
1199    (possibly both).
1200 
1201    Level: intermediate
1202 
1203 .keywords: matrix,dense, parallel
1204 
1205 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1206 @*/
1207 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1208 {
1209   PetscErrorCode ierr;
1210   PetscMPIInt    size;
1211 
1212   PetscFunctionBegin;
1213   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1215   if (size > 1) {
1216     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1217     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1218   } else {
1219     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1220     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1221   }
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatDuplicate_MPIDense"
1227 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1228 {
1229   Mat          mat;
1230   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   *newmat       = 0;
1235   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1236   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1237   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1238   mat->data         = (void*)a;
1239   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1240   mat->factor       = A->factor;
1241   mat->assembled    = PETSC_TRUE;
1242   mat->preallocated = PETSC_TRUE;
1243 
1244   a->rstart       = oldmat->rstart;
1245   a->rend         = oldmat->rend;
1246   a->size         = oldmat->size;
1247   a->rank         = oldmat->rank;
1248   mat->insertmode = NOT_SET_VALUES;
1249   a->nvec         = oldmat->nvec;
1250   a->donotstash   = oldmat->donotstash;
1251   ierr            = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1252   PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1253   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1254   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1255 
1256   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1257   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1258   PetscLogObjectParent(mat,a->A);
1259   *newmat = mat;
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #include "petscsys.h"
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1267 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,const MatType type,Mat *newmat)
1268 {
1269   PetscErrorCode ierr;
1270   PetscMPIInt    rank,size;
1271   PetscInt            *rowners,i,m,nz,j;
1272   PetscScalar    *array,*vals,*vals_ptr;
1273   MPI_Status     status;
1274 
1275   PetscFunctionBegin;
1276   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1277   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1278 
1279   /* determine ownership of all rows */
1280   m          = M/size + ((M % size) > rank);
1281   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1282   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1283   rowners[0] = 0;
1284   for (i=2; i<=size; i++) {
1285     rowners[i] += rowners[i-1];
1286   }
1287 
1288   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1289   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1290   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1291   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1292 
1293   if (!rank) {
1294     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1295 
1296     /* read in my part of the matrix numerical values  */
1297     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1298 
1299     /* insert into matrix-by row (this is why cannot directly read into array */
1300     vals_ptr = vals;
1301     for (i=0; i<m; i++) {
1302       for (j=0; j<N; j++) {
1303         array[i + j*m] = *vals_ptr++;
1304       }
1305     }
1306 
1307     /* read in other processors and ship out */
1308     for (i=1; i<size; i++) {
1309       nz   = (rowners[i+1] - rowners[i])*N;
1310       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1311       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1312     }
1313   } else {
1314     /* receive numeric values */
1315     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1316 
1317     /* receive message of values*/
1318     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1319 
1320     /* insert into matrix-by row (this is why cannot directly read into array */
1321     vals_ptr = vals;
1322     for (i=0; i<m; i++) {
1323       for (j=0; j<N; j++) {
1324         array[i + j*m] = *vals_ptr++;
1325       }
1326     }
1327   }
1328   ierr = PetscFree(rowners);CHKERRQ(ierr);
1329   ierr = PetscFree(vals);CHKERRQ(ierr);
1330   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1331   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatLoad_MPIDense"
1337 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1338 {
1339   Mat            A;
1340   PetscScalar    *vals,*svals;
1341   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1342   MPI_Status     status;
1343   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1344   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1345   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1346   PetscInt       i,nz,j,rstart,rend;
1347   int            fd;
1348   PetscErrorCode ierr;
1349 
1350   PetscFunctionBegin;
1351   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1352   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1353   if (!rank) {
1354     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1355     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1356     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1357   }
1358 
1359   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1360   M = header[1]; N = header[2]; nz = header[3];
1361 
1362   /*
1363        Handle case where matrix is stored on disk as a dense matrix
1364   */
1365   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1366     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1367     PetscFunctionReturn(0);
1368   }
1369 
1370   /* determine ownership of all rows */
1371   m          = M/size + ((M % size) > rank);
1372   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1373   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1374   rowners[0] = 0;
1375   for (i=2; i<=size; i++) {
1376     rowners[i] += rowners[i-1];
1377   }
1378   rstart = rowners[rank];
1379   rend   = rowners[rank+1];
1380 
1381   /* distribute row lengths to all processors */
1382   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1383   offlens = ourlens + (rend-rstart);
1384   if (!rank) {
1385     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1386     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1387     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1388     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1389     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1390     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1391   } else {
1392     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1393   }
1394 
1395   if (!rank) {
1396     /* calculate the number of nonzeros on each processor */
1397     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1398     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1399     for (i=0; i<size; i++) {
1400       for (j=rowners[i]; j< rowners[i+1]; j++) {
1401         procsnz[i] += rowlengths[j];
1402       }
1403     }
1404     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1405 
1406     /* determine max buffer needed and allocate it */
1407     maxnz = 0;
1408     for (i=0; i<size; i++) {
1409       maxnz = PetscMax(maxnz,procsnz[i]);
1410     }
1411     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1412 
1413     /* read in my part of the matrix column indices  */
1414     nz = procsnz[0];
1415     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1416     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1417 
1418     /* read in every one elses and ship off */
1419     for (i=1; i<size; i++) {
1420       nz   = procsnz[i];
1421       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1422       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1423     }
1424     ierr = PetscFree(cols);CHKERRQ(ierr);
1425   } else {
1426     /* determine buffer space needed for message */
1427     nz = 0;
1428     for (i=0; i<m; i++) {
1429       nz += ourlens[i];
1430     }
1431     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1432 
1433     /* receive message of column indices*/
1434     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1435     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1436     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1437   }
1438 
1439   /* loop over local rows, determining number of off diagonal entries */
1440   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1441   jj = 0;
1442   for (i=0; i<m; i++) {
1443     for (j=0; j<ourlens[i]; j++) {
1444       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1445       jj++;
1446     }
1447   }
1448 
1449   /* create our matrix */
1450   for (i=0; i<m; i++) {
1451     ourlens[i] -= offlens[i];
1452   }
1453   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1454   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1455   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1456   A = *newmat;
1457   for (i=0; i<m; i++) {
1458     ourlens[i] += offlens[i];
1459   }
1460 
1461   if (!rank) {
1462     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1463 
1464     /* read in my part of the matrix numerical values  */
1465     nz = procsnz[0];
1466     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1467 
1468     /* insert into matrix */
1469     jj      = rstart;
1470     smycols = mycols;
1471     svals   = vals;
1472     for (i=0; i<m; i++) {
1473       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1474       smycols += ourlens[i];
1475       svals   += ourlens[i];
1476       jj++;
1477     }
1478 
1479     /* read in other processors and ship out */
1480     for (i=1; i<size; i++) {
1481       nz   = procsnz[i];
1482       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1483       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1484     }
1485     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1486   } else {
1487     /* receive numeric values */
1488     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1489 
1490     /* receive message of values*/
1491     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1492     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1493     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1494 
1495     /* insert into matrix */
1496     jj      = rstart;
1497     smycols = mycols;
1498     svals   = vals;
1499     for (i=0; i<m; i++) {
1500       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1501       smycols += ourlens[i];
1502       svals   += ourlens[i];
1503       jj++;
1504     }
1505   }
1506   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1507   ierr = PetscFree(vals);CHKERRQ(ierr);
1508   ierr = PetscFree(mycols);CHKERRQ(ierr);
1509   ierr = PetscFree(rowners);CHKERRQ(ierr);
1510 
1511   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1512   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 
1517 
1518 
1519 
1520