xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision d07a9264daf19612ed0b9951a5a70bfd0f76d6cd)
1 /*
2    Basic functions for basic parallel dense matrices.
3 */
4 
5 #include "src/mat/impls/dense/mpi/mpidense.h"
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
10 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
11 {
12   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
13   PetscErrorCode ierr;
14   int          m = A->m,rstart = mdn->rstart;
15   PetscScalar  *array;
16   MPI_Comm     comm;
17 
18   PetscFunctionBegin;
19   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
20 
21   /* The reuse aspect is not implemented efficiently */
22   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
23 
24   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
25   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
26   ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr);
27   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
28   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatSetValues_MPIDense"
40 PetscErrorCode MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
41 {
42   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
43   PetscErrorCode ierr;
44   int          i,j,rstart = A->rstart,rend = A->rend,row;
45   PetscTruth   roworiented = A->roworiented;
46 
47   PetscFunctionBegin;
48   for (i=0; i<m; i++) {
49     if (idxm[i] < 0) continue;
50     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
51     if (idxm[i] >= rstart && idxm[i] < rend) {
52       row = idxm[i] - rstart;
53       if (roworiented) {
54         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
55       } else {
56         for (j=0; j<n; j++) {
57           if (idxn[j] < 0) continue;
58           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
59           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
60         }
61       }
62     } else {
63       if (!A->donotstash) {
64         if (roworiented) {
65           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
66         } else {
67           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
68         }
69       }
70     }
71   }
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatGetValues_MPIDense"
77 PetscErrorCode MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
78 {
79   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
80   PetscErrorCode ierr;
81   int          i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82 
83   PetscFunctionBegin;
84   for (i=0; i<m; i++) {
85     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for (j=0; j<n; j++) {
90         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91         if (idxn[j] >= mat->N) {
92           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93         }
94         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
95       }
96     } else {
97       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98     }
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatGetArray_MPIDense"
105 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
106 {
107   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
117 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118 {
119   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121   PetscErrorCode ierr;
122   int          i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
123   PetscScalar  *av,*bv,*v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
128   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
129   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
130   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
131 
132   /* No parallel redistribution currently supported! Should really check each index set
133      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
134      original matrix! */
135 
136   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
137   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
138 
139   /* Check submatrix call */
140   if (scall == MAT_REUSE_MATRIX) {
141     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
142     /* Really need to test rows and column sizes! */
143     newmat = *B;
144   } else {
145     /* Create and fill new matrix */
146     ierr = MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);CHKERRQ(ierr);
147     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
148     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
149   }
150 
151   /* Now extract the data pointers and do the copy, column at a time */
152   newmatd = (Mat_MPIDense*)newmat->data;
153   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
154 
155   for (i=0; i<ncols; i++) {
156     av = v + nlrows*icol[i];
157     for (j=0; j<nrows; j++) {
158       *bv++ = av[irow[j] - rstart];
159     }
160   }
161 
162   /* Assemble the matrices so that the correct flags are set */
163   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165 
166   /* Free work space */
167   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
168   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
169   *B = newmat;
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatRestoreArray_MPIDense"
175 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
176 {
177   PetscFunctionBegin;
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
183 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
184 {
185   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
186   MPI_Comm     comm = mat->comm;
187   PetscErrorCode ierr;
188   int          nstash,reallocs;
189   InsertMode   addv;
190 
191   PetscFunctionBegin;
192   /* make sure all processors are either in INSERTMODE or ADDMODE */
193   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
194   if (addv == (ADD_VALUES|INSERT_VALUES)) {
195     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
196   }
197   mat->insertmode = addv; /* in case this processor had no cache */
198 
199   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
200   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
201   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
207 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
208 {
209   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
210   PetscErrorCode ierr;
211   int          i,n,*row,*col,flg,j,rstart,ncols;
212   PetscScalar  *val;
213   InsertMode   addv=mat->insertmode;
214 
215   PetscFunctionBegin;
216   /*  wait on receives */
217   while (1) {
218     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
219     if (!flg) break;
220 
221     for (i=0; i<n;) {
222       /* Now identify the consecutive vals belonging to the same row */
223       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
224       if (j < n) ncols = j-i;
225       else       ncols = n-i;
226       /* Now assemble all these values with a single function call */
227       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
228       i = j;
229     }
230   }
231   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatZeroEntries_MPIDense"
244 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
245 {
246   PetscErrorCode ierr;
247   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 /* the code does not do the diagonal entries correctly unless the
255    matrix is square and the column and row owerships are identical.
256    This is a BUG. The only way to fix it seems to be to access
257    mdn->A and mdn->B directly and not through the MatZeroRows()
258    routine.
259 */
260 #undef __FUNCT__
261 #define __FUNCT__ "MatZeroRows_MPIDense"
262 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
263 {
264   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
265   PetscErrorCode ierr;
266   int            i,N,*rows,*owners = l->rowners,size = l->size;
267   int            *nprocs,j,idx,nsends;
268   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
269   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
270   int            *lens,imdex,*lrows,*values;
271   MPI_Comm       comm = A->comm;
272   MPI_Request    *send_waits,*recv_waits;
273   MPI_Status     recv_status,*send_status;
274   IS             istmp;
275   PetscTruth     found;
276 
277   PetscFunctionBegin;
278   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
279   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
280 
281   /*  first count number of contributors to each processor */
282   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
283   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
284   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
285   for (i=0; i<N; i++) {
286     idx = rows[i];
287     found = PETSC_FALSE;
288     for (j=0; j<size; j++) {
289       if (idx >= owners[j] && idx < owners[j+1]) {
290         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
291       }
292     }
293     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
294   }
295   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
296 
297   /* inform other processors of number of messages and max length*/
298   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
299 
300   /* post receives:   */
301   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
302   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
303   for (i=0; i<nrecvs; i++) {
304     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
305   }
306 
307   /* do sends:
308       1) starts[i] gives the starting index in svalues for stuff going to
309          the ith processor
310   */
311   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
312   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
313   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
314   starts[0]  = 0;
315   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
316   for (i=0; i<N; i++) {
317     svalues[starts[owner[i]]++] = rows[i];
318   }
319   ISRestoreIndices(is,&rows);
320 
321   starts[0] = 0;
322   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
323   count = 0;
324   for (i=0; i<size; i++) {
325     if (nprocs[2*i+1]) {
326       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
327     }
328   }
329   ierr = PetscFree(starts);CHKERRQ(ierr);
330 
331   base = owners[rank];
332 
333   /*  wait on receives */
334   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
335   source = lens + nrecvs;
336   count  = nrecvs; slen = 0;
337   while (count) {
338     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
339     /* unpack receives into our local space */
340     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
341     source[imdex]  = recv_status.MPI_SOURCE;
342     lens[imdex]  = n;
343     slen += n;
344     count--;
345   }
346   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
347 
348   /* move the data into the send scatter */
349   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
350   count = 0;
351   for (i=0; i<nrecvs; i++) {
352     values = rvalues + i*nmax;
353     for (j=0; j<lens[i]; j++) {
354       lrows[count++] = values[j] - base;
355     }
356   }
357   ierr = PetscFree(rvalues);CHKERRQ(ierr);
358   ierr = PetscFree(lens);CHKERRQ(ierr);
359   ierr = PetscFree(owner);CHKERRQ(ierr);
360   ierr = PetscFree(nprocs);CHKERRQ(ierr);
361 
362   /* actually zap the local rows */
363   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
364   PetscLogObjectParent(A,istmp);
365   ierr = PetscFree(lrows);CHKERRQ(ierr);
366   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
367   ierr = ISDestroy(istmp);CHKERRQ(ierr);
368 
369   /* wait on sends */
370   if (nsends) {
371     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
372     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
373     ierr = PetscFree(send_status);CHKERRQ(ierr);
374   }
375   ierr = PetscFree(send_waits);CHKERRQ(ierr);
376   ierr = PetscFree(svalues);CHKERRQ(ierr);
377 
378   PetscFunctionReturn(0);
379 }
380 
381 #undef __FUNCT__
382 #define __FUNCT__ "MatMult_MPIDense"
383 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
384 {
385   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
386   PetscErrorCode ierr;
387 
388   PetscFunctionBegin;
389   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
390   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
391   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
392   PetscFunctionReturn(0);
393 }
394 
395 #undef __FUNCT__
396 #define __FUNCT__ "MatMultAdd_MPIDense"
397 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
398 {
399   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
400   PetscErrorCode ierr;
401 
402   PetscFunctionBegin;
403   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
404   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
405   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatMultTranspose_MPIDense"
411 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
412 {
413   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
414   PetscErrorCode ierr;
415   PetscScalar  zero = 0.0;
416 
417   PetscFunctionBegin;
418   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
419   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
420   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
421   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
427 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
428 {
429   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
430   PetscErrorCode ierr;
431 
432   PetscFunctionBegin;
433   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
434   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
435   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
436   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
437   PetscFunctionReturn(0);
438 }
439 
440 #undef __FUNCT__
441 #define __FUNCT__ "MatGetDiagonal_MPIDense"
442 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
443 {
444   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
445   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
446   PetscErrorCode ierr;
447   int          len,i,n,m = A->m,radd;
448   PetscScalar  *x,zero = 0.0;
449 
450   PetscFunctionBegin;
451   ierr = VecSet(&zero,v);CHKERRQ(ierr);
452   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
453   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
454   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
455   len  = PetscMin(a->A->m,a->A->n);
456   radd = a->rstart*m;
457   for (i=0; i<len; i++) {
458     x[i] = aloc->v[radd + i*m + i];
459   }
460   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatDestroy_MPIDense"
466 PetscErrorCode MatDestroy_MPIDense(Mat mat)
467 {
468   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
469   PetscErrorCode ierr;
470 
471   PetscFunctionBegin;
472 
473 #if defined(PETSC_USE_LOG)
474   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
475 #endif
476   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
477   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
478   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
479   if (mdn->lvec)   VecDestroy(mdn->lvec);
480   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
481   if (mdn->factor) {
482     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
483     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
484     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
485     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
486   }
487   ierr = PetscFree(mdn);CHKERRQ(ierr);
488   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
489   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatView_MPIDense_Binary"
495 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
496 {
497   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
498   PetscErrorCode ierr;
499 
500   PetscFunctionBegin;
501   if (mdn->size == 1) {
502     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
503   }
504   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
510 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
511 {
512   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
513   PetscErrorCode    ierr;
514   PetscMPIInt       size = mdn->size,rank = mdn->rank;
515   PetscViewerType   vtype;
516   PetscTruth        iascii,isdraw;
517   PetscViewer       sviewer;
518   PetscViewerFormat format;
519 
520   PetscFunctionBegin;
521   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
522   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
523   if (iascii) {
524     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
525     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
526     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
527       MatInfo info;
528       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
529       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
530                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
531       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
532       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
533       PetscFunctionReturn(0);
534     } else if (format == PETSC_VIEWER_ASCII_INFO) {
535       PetscFunctionReturn(0);
536     }
537   } else if (isdraw) {
538     PetscDraw       draw;
539     PetscTruth isnull;
540 
541     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
542     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
543     if (isnull) PetscFunctionReturn(0);
544   }
545 
546   if (size == 1) {
547     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
548   } else {
549     /* assemble the entire matrix onto first processor. */
550     Mat               A;
551     int               M = mat->M,N = mat->N,m,row,i,nz;
552     const int         *cols;
553     const PetscScalar *vals;
554 
555     if (!rank) {
556       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
557     } else {
558       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
559     }
560     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
561     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
562     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
563     PetscLogObjectParent(mat,A);
564 
565     /* Copy the matrix ... This isn't the most efficient means,
566        but it's quick for now */
567     row = mdn->rstart; m = mdn->A->m;
568     for (i=0; i<m; i++) {
569       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
570       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
571       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
572       row++;
573     }
574 
575     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
576     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
577     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
578     if (!rank) {
579       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
580     }
581     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
582     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
583     ierr = MatDestroy(A);CHKERRQ(ierr);
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatView_MPIDense"
590 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
591 {
592   PetscErrorCode ierr;
593   PetscTruth iascii,isbinary,isdraw,issocket;
594 
595   PetscFunctionBegin;
596 
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
601 
602   if (iascii || issocket || isdraw) {
603     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
604   } else if (isbinary) {
605     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
606   } else {
607     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatGetInfo_MPIDense"
614 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
615 {
616   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
617   Mat          mdn = mat->A;
618   PetscErrorCode ierr;
619   PetscReal    isend[5],irecv[5];
620 
621   PetscFunctionBegin;
622   info->rows_global    = (double)A->M;
623   info->columns_global = (double)A->N;
624   info->rows_local     = (double)A->m;
625   info->columns_local  = (double)A->N;
626   info->block_size     = 1.0;
627   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
628   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
629   isend[3] = info->memory;  isend[4] = info->mallocs;
630   if (flag == MAT_LOCAL) {
631     info->nz_used      = isend[0];
632     info->nz_allocated = isend[1];
633     info->nz_unneeded  = isend[2];
634     info->memory       = isend[3];
635     info->mallocs      = isend[4];
636   } else if (flag == MAT_GLOBAL_MAX) {
637     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
638     info->nz_used      = irecv[0];
639     info->nz_allocated = irecv[1];
640     info->nz_unneeded  = irecv[2];
641     info->memory       = irecv[3];
642     info->mallocs      = irecv[4];
643   } else if (flag == MAT_GLOBAL_SUM) {
644     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
645     info->nz_used      = irecv[0];
646     info->nz_allocated = irecv[1];
647     info->nz_unneeded  = irecv[2];
648     info->memory       = irecv[3];
649     info->mallocs      = irecv[4];
650   }
651   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
652   info->fill_ratio_needed = 0;
653   info->factor_mallocs    = 0;
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatSetOption_MPIDense"
659 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
660 {
661   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
662   PetscErrorCode ierr;
663 
664   PetscFunctionBegin;
665   switch (op) {
666   case MAT_NO_NEW_NONZERO_LOCATIONS:
667   case MAT_YES_NEW_NONZERO_LOCATIONS:
668   case MAT_NEW_NONZERO_LOCATION_ERR:
669   case MAT_NEW_NONZERO_ALLOCATION_ERR:
670   case MAT_COLUMNS_SORTED:
671   case MAT_COLUMNS_UNSORTED:
672     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
673     break;
674   case MAT_ROW_ORIENTED:
675     a->roworiented = PETSC_TRUE;
676     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
677     break;
678   case MAT_ROWS_SORTED:
679   case MAT_ROWS_UNSORTED:
680   case MAT_YES_NEW_DIAGONALS:
681   case MAT_USE_HASH_TABLE:
682     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
683     break;
684   case MAT_COLUMN_ORIENTED:
685     a->roworiented = PETSC_FALSE;
686     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
687     break;
688   case MAT_IGNORE_OFF_PROC_ENTRIES:
689     a->donotstash = PETSC_TRUE;
690     break;
691   case MAT_NO_NEW_DIAGONALS:
692     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
693   case MAT_SYMMETRIC:
694   case MAT_STRUCTURALLY_SYMMETRIC:
695   case MAT_NOT_SYMMETRIC:
696   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
697   case MAT_HERMITIAN:
698   case MAT_NOT_HERMITIAN:
699   case MAT_SYMMETRY_ETERNAL:
700   case MAT_NOT_SYMMETRY_ETERNAL:
701     break;
702   default:
703     SETERRQ(PETSC_ERR_SUP,"unknown option");
704   }
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatGetRow_MPIDense"
710 PetscErrorCode MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
711 {
712   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
713   PetscErrorCode ierr;
714   int          lrow,rstart = mat->rstart,rend = mat->rend;
715 
716   PetscFunctionBegin;
717   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
718   lrow = row - rstart;
719   ierr = MatGetRow(mat->A,lrow,nz,(const int **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatRestoreRow_MPIDense"
725 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
726 {
727   PetscErrorCode ierr;
728 
729   PetscFunctionBegin;
730   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
731   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "MatDiagonalScale_MPIDense"
737 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
738 {
739   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
740   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
741   PetscScalar  *l,*r,x,*v;
742   PetscErrorCode ierr;
743   int          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
744 
745   PetscFunctionBegin;
746   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
747   if (ll) {
748     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
749     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
750     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
751     for (i=0; i<m; i++) {
752       x = l[i];
753       v = mat->v + i;
754       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
755     }
756     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
757     PetscLogFlops(n*m);
758   }
759   if (rr) {
760     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
761     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
762     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
763     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
764     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
765     for (i=0; i<n; i++) {
766       x = r[i];
767       v = mat->v + i*m;
768       for (j=0; j<m; j++) { (*v++) *= x;}
769     }
770     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
771     PetscLogFlops(n*m);
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 #undef __FUNCT__
777 #define __FUNCT__ "MatNorm_MPIDense"
778 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
779 {
780   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
781   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
782   PetscErrorCode ierr;
783   int          i,j;
784   PetscReal    sum = 0.0;
785   PetscScalar  *v = mat->v;
786 
787   PetscFunctionBegin;
788   if (mdn->size == 1) {
789     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
790   } else {
791     if (type == NORM_FROBENIUS) {
792       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
793 #if defined(PETSC_USE_COMPLEX)
794         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
795 #else
796         sum += (*v)*(*v); v++;
797 #endif
798       }
799       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
800       *nrm = sqrt(*nrm);
801       PetscLogFlops(2*mdn->A->n*mdn->A->m);
802     } else if (type == NORM_1) {
803       PetscReal *tmp,*tmp2;
804       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
805       tmp2 = tmp + A->N;
806       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
807       *nrm = 0.0;
808       v = mat->v;
809       for (j=0; j<mdn->A->n; j++) {
810         for (i=0; i<mdn->A->m; i++) {
811           tmp[j] += PetscAbsScalar(*v);  v++;
812         }
813       }
814       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
815       for (j=0; j<A->N; j++) {
816         if (tmp2[j] > *nrm) *nrm = tmp2[j];
817       }
818       ierr = PetscFree(tmp);CHKERRQ(ierr);
819       PetscLogFlops(A->n*A->m);
820     } else if (type == NORM_INFINITY) { /* max row norm */
821       PetscReal ntemp;
822       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
823       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
824     } else {
825       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
826     }
827   }
828   PetscFunctionReturn(0);
829 }
830 
831 #undef __FUNCT__
832 #define __FUNCT__ "MatTranspose_MPIDense"
833 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
834 {
835   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
836   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
837   Mat          B;
838   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
839   PetscErrorCode ierr;
840   int          j,i;
841   PetscScalar  *v;
842 
843   PetscFunctionBegin;
844   if (!matout && M != N) {
845     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
846   }
847   ierr = MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);CHKERRQ(ierr);
848   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
849   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
850 
851   m = a->A->m; n = a->A->n; v = Aloc->v;
852   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
853   for (j=0; j<n; j++) {
854     for (i=0; i<m; i++) rwork[i] = rstart + i;
855     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
856     v   += m;
857   }
858   ierr = PetscFree(rwork);CHKERRQ(ierr);
859   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
860   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861   if (matout) {
862     *matout = B;
863   } else {
864     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
865   }
866   PetscFunctionReturn(0);
867 }
868 
869 #include "petscblaslapack.h"
870 #undef __FUNCT__
871 #define __FUNCT__ "MatScale_MPIDense"
872 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
873 {
874   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
875   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
876   PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;
877 
878   PetscFunctionBegin;
879   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
880   PetscLogFlops(nz);
881   PetscFunctionReturn(0);
882 }
883 
884 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
888 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
889 {
890   PetscErrorCode ierr;
891 
892   PetscFunctionBegin;
893   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
894   PetscFunctionReturn(0);
895 }
896 
897 /* -------------------------------------------------------------------*/
898 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
899        MatGetRow_MPIDense,
900        MatRestoreRow_MPIDense,
901        MatMult_MPIDense,
902 /* 4*/ MatMultAdd_MPIDense,
903        MatMultTranspose_MPIDense,
904        MatMultTransposeAdd_MPIDense,
905        0,
906        0,
907        0,
908 /*10*/ 0,
909        0,
910        0,
911        0,
912        MatTranspose_MPIDense,
913 /*15*/ MatGetInfo_MPIDense,
914        0,
915        MatGetDiagonal_MPIDense,
916        MatDiagonalScale_MPIDense,
917        MatNorm_MPIDense,
918 /*20*/ MatAssemblyBegin_MPIDense,
919        MatAssemblyEnd_MPIDense,
920        0,
921        MatSetOption_MPIDense,
922        MatZeroEntries_MPIDense,
923 /*25*/ MatZeroRows_MPIDense,
924        0,
925        0,
926        0,
927        0,
928 /*30*/ MatSetUpPreallocation_MPIDense,
929        0,
930        0,
931        MatGetArray_MPIDense,
932        MatRestoreArray_MPIDense,
933 /*35*/ MatDuplicate_MPIDense,
934        0,
935        0,
936        0,
937        0,
938 /*40*/ 0,
939        MatGetSubMatrices_MPIDense,
940        0,
941        MatGetValues_MPIDense,
942        0,
943 /*45*/ 0,
944        MatScale_MPIDense,
945        0,
946        0,
947        0,
948 /*50*/ 0,
949        0,
950        0,
951        0,
952        0,
953 /*55*/ 0,
954        0,
955        0,
956        0,
957        0,
958 /*60*/ MatGetSubMatrix_MPIDense,
959        MatDestroy_MPIDense,
960        MatView_MPIDense,
961        MatGetPetscMaps_Petsc,
962        0,
963 /*65*/ 0,
964        0,
965        0,
966        0,
967        0,
968 /*70*/ 0,
969        0,
970        0,
971        0,
972        0,
973 /*75*/ 0,
974        0,
975        0,
976        0,
977        0,
978 /*80*/ 0,
979        0,
980        0,
981        0,
982 /*84*/ MatLoad_MPIDense,
983        0,
984        0,
985        0,
986        0,
987        0,
988 /*90*/ 0,
989        0,
990        0,
991        0,
992        0,
993 /*95*/ 0,
994        0,
995        0,
996        0};
997 
998 EXTERN_C_BEGIN
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1001 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1002 {
1003   Mat_MPIDense *a;
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   mat->preallocated = PETSC_TRUE;
1008   /* Note:  For now, when data is specified above, this assumes the user correctly
1009    allocates the local dense storage space.  We should add error checking. */
1010 
1011   a    = (Mat_MPIDense*)mat->data;
1012   ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr);
1013   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1014   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1015   PetscLogObjectParent(mat,a->A);
1016   PetscFunctionReturn(0);
1017 }
1018 EXTERN_C_END
1019 
1020 /*MC
1021    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1022 
1023    Options Database Keys:
1024 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1025 
1026   Level: beginner
1027 
1028 .seealso: MatCreateMPIDense
1029 M*/
1030 
1031 EXTERN_C_BEGIN
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatCreate_MPIDense"
1034 PetscErrorCode MatCreate_MPIDense(Mat mat)
1035 {
1036   Mat_MPIDense *a;
1037   PetscErrorCode ierr;
1038   int          i;
1039 
1040   PetscFunctionBegin;
1041   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1042   mat->data         = (void*)a;
1043   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1044   mat->factor       = 0;
1045   mat->mapping      = 0;
1046 
1047   a->factor       = 0;
1048   mat->insertmode = NOT_SET_VALUES;
1049   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1050   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1051 
1052   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1053   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1054   a->nvec = mat->n;
1055 
1056   /* the information in the maps duplicates the information computed below, eventually
1057      we should remove the duplicate information that is not contained in the maps */
1058   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1059   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1060 
1061   /* build local table of row and column ownerships */
1062   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1063   a->cowners = a->rowners + a->size + 1;
1064   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1065   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1066   a->rowners[0] = 0;
1067   for (i=2; i<=a->size; i++) {
1068     a->rowners[i] += a->rowners[i-1];
1069   }
1070   a->rstart = a->rowners[a->rank];
1071   a->rend   = a->rowners[a->rank+1];
1072   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1073   a->cowners[0] = 0;
1074   for (i=2; i<=a->size; i++) {
1075     a->cowners[i] += a->cowners[i-1];
1076   }
1077 
1078   /* build cache for off array entries formed */
1079   a->donotstash = PETSC_FALSE;
1080   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1081 
1082   /* stuff used for matrix vector multiply */
1083   a->lvec        = 0;
1084   a->Mvctx       = 0;
1085   a->roworiented = PETSC_TRUE;
1086 
1087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1088                                      "MatGetDiagonalBlock_MPIDense",
1089                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1091                                      "MatMPIDenseSetPreallocation_MPIDense",
1092                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 EXTERN_C_END
1096 
1097 /*MC
1098    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1099 
1100    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1101    and MATMPIDENSE otherwise.
1102 
1103    Options Database Keys:
1104 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1105 
1106   Level: beginner
1107 
1108 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1109 M*/
1110 
1111 EXTERN_C_BEGIN
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatCreate_Dense"
1114 PetscErrorCode MatCreate_Dense(Mat A)
1115 {
1116   PetscErrorCode ierr;
1117   int size;
1118 
1119   PetscFunctionBegin;
1120   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1121   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1122   if (size == 1) {
1123     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1124   } else {
1125     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1126   }
1127   PetscFunctionReturn(0);
1128 }
1129 EXTERN_C_END
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1133 /*@C
1134    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1135 
1136    Not collective
1137 
1138    Input Parameters:
1139 .  A - the matrix
1140 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1141    to control all matrix memory allocation.
1142 
1143    Notes:
1144    The dense format is fully compatible with standard Fortran 77
1145    storage by columns.
1146 
1147    The data input variable is intended primarily for Fortran programmers
1148    who wish to allocate their own matrix memory space.  Most users should
1149    set data=PETSC_NULL.
1150 
1151    Level: intermediate
1152 
1153 .keywords: matrix,dense, parallel
1154 
1155 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1156 @*/
1157 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1158 {
1159   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1160 
1161   PetscFunctionBegin;
1162   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1163   if (f) {
1164     ierr = (*f)(mat,data);CHKERRQ(ierr);
1165   }
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatCreateMPIDense"
1171 /*@C
1172    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1173 
1174    Collective on MPI_Comm
1175 
1176    Input Parameters:
1177 +  comm - MPI communicator
1178 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1179 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1180 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1181 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1182 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1183    to control all matrix memory allocation.
1184 
1185    Output Parameter:
1186 .  A - the matrix
1187 
1188    Notes:
1189    The dense format is fully compatible with standard Fortran 77
1190    storage by columns.
1191 
1192    The data input variable is intended primarily for Fortran programmers
1193    who wish to allocate their own matrix memory space.  Most users should
1194    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1195 
1196    The user MUST specify either the local or global matrix dimensions
1197    (possibly both).
1198 
1199    Level: intermediate
1200 
1201 .keywords: matrix,dense, parallel
1202 
1203 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1204 @*/
1205 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1206 {
1207   PetscErrorCode ierr;
1208   int size;
1209 
1210   PetscFunctionBegin;
1211   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1212   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1213   if (size > 1) {
1214     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1215     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1216   } else {
1217     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1218     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1219   }
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 #undef __FUNCT__
1224 #define __FUNCT__ "MatDuplicate_MPIDense"
1225 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1226 {
1227   Mat          mat;
1228   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1229   PetscErrorCode ierr;
1230 
1231   PetscFunctionBegin;
1232   *newmat       = 0;
1233   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1234   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1235   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1236   mat->data         = (void*)a;
1237   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1238   mat->factor       = A->factor;
1239   mat->assembled    = PETSC_TRUE;
1240   mat->preallocated = PETSC_TRUE;
1241 
1242   a->rstart       = oldmat->rstart;
1243   a->rend         = oldmat->rend;
1244   a->size         = oldmat->size;
1245   a->rank         = oldmat->rank;
1246   mat->insertmode = NOT_SET_VALUES;
1247   a->nvec         = oldmat->nvec;
1248   a->donotstash   = oldmat->donotstash;
1249   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1250   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1251   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1252   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1253 
1254   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1255   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1256   PetscLogObjectParent(mat,a->A);
1257   *newmat = mat;
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #include "petscsys.h"
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1265 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,const MatType type,Mat *newmat)
1266 {
1267   PetscErrorCode ierr;
1268   PetscMPIInt    rank,size;
1269   int            *rowners,i,m,nz,j;
1270   PetscScalar    *array,*vals,*vals_ptr;
1271   MPI_Status     status;
1272 
1273   PetscFunctionBegin;
1274   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1275   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1276 
1277   /* determine ownership of all rows */
1278   m          = M/size + ((M % size) > rank);
1279   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1280   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1281   rowners[0] = 0;
1282   for (i=2; i<=size; i++) {
1283     rowners[i] += rowners[i-1];
1284   }
1285 
1286   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1287   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1288   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1289   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1290 
1291   if (!rank) {
1292     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1293 
1294     /* read in my part of the matrix numerical values  */
1295     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1296 
1297     /* insert into matrix-by row (this is why cannot directly read into array */
1298     vals_ptr = vals;
1299     for (i=0; i<m; i++) {
1300       for (j=0; j<N; j++) {
1301         array[i + j*m] = *vals_ptr++;
1302       }
1303     }
1304 
1305     /* read in other processors and ship out */
1306     for (i=1; i<size; i++) {
1307       nz   = (rowners[i+1] - rowners[i])*N;
1308       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1309       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1310     }
1311   } else {
1312     /* receive numeric values */
1313     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1314 
1315     /* receive message of values*/
1316     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1317 
1318     /* insert into matrix-by row (this is why cannot directly read into array */
1319     vals_ptr = vals;
1320     for (i=0; i<m; i++) {
1321       for (j=0; j<N; j++) {
1322         array[i + j*m] = *vals_ptr++;
1323       }
1324     }
1325   }
1326   ierr = PetscFree(rowners);CHKERRQ(ierr);
1327   ierr = PetscFree(vals);CHKERRQ(ierr);
1328   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1329   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 #undef __FUNCT__
1334 #define __FUNCT__ "MatLoad_MPIDense"
1335 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1336 {
1337   Mat            A;
1338   PetscScalar    *vals,*svals;
1339   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1340   MPI_Status     status;
1341   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1342   int            header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1343   int            *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1344   int            i,nz,j,rstart,rend,fd;
1345   PetscErrorCode ierr;
1346 
1347   PetscFunctionBegin;
1348   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1349   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1352     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1353     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1354   }
1355 
1356   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1357   M = header[1]; N = header[2]; nz = header[3];
1358 
1359   /*
1360        Handle case where matrix is stored on disk as a dense matrix
1361   */
1362   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1363     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1364     PetscFunctionReturn(0);
1365   }
1366 
1367   /* determine ownership of all rows */
1368   m          = M/size + ((M % size) > rank);
1369   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1370   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1371   rowners[0] = 0;
1372   for (i=2; i<=size; i++) {
1373     rowners[i] += rowners[i-1];
1374   }
1375   rstart = rowners[rank];
1376   rend   = rowners[rank+1];
1377 
1378   /* distribute row lengths to all processors */
1379   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1380   offlens = ourlens + (rend-rstart);
1381   if (!rank) {
1382     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1383     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1384     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1385     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1386     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1387     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1388   } else {
1389     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1390   }
1391 
1392   if (!rank) {
1393     /* calculate the number of nonzeros on each processor */
1394     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1395     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1396     for (i=0; i<size; i++) {
1397       for (j=rowners[i]; j< rowners[i+1]; j++) {
1398         procsnz[i] += rowlengths[j];
1399       }
1400     }
1401     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1402 
1403     /* determine max buffer needed and allocate it */
1404     maxnz = 0;
1405     for (i=0; i<size; i++) {
1406       maxnz = PetscMax(maxnz,procsnz[i]);
1407     }
1408     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1409 
1410     /* read in my part of the matrix column indices  */
1411     nz = procsnz[0];
1412     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1413     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1414 
1415     /* read in every one elses and ship off */
1416     for (i=1; i<size; i++) {
1417       nz   = procsnz[i];
1418       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1419       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1420     }
1421     ierr = PetscFree(cols);CHKERRQ(ierr);
1422   } else {
1423     /* determine buffer space needed for message */
1424     nz = 0;
1425     for (i=0; i<m; i++) {
1426       nz += ourlens[i];
1427     }
1428     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1429 
1430     /* receive message of column indices*/
1431     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1432     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1433     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1434   }
1435 
1436   /* loop over local rows, determining number of off diagonal entries */
1437   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1438   jj = 0;
1439   for (i=0; i<m; i++) {
1440     for (j=0; j<ourlens[i]; j++) {
1441       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1442       jj++;
1443     }
1444   }
1445 
1446   /* create our matrix */
1447   for (i=0; i<m; i++) {
1448     ourlens[i] -= offlens[i];
1449   }
1450   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1451   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1452   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1453   A = *newmat;
1454   for (i=0; i<m; i++) {
1455     ourlens[i] += offlens[i];
1456   }
1457 
1458   if (!rank) {
1459     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1460 
1461     /* read in my part of the matrix numerical values  */
1462     nz = procsnz[0];
1463     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1464 
1465     /* insert into matrix */
1466     jj      = rstart;
1467     smycols = mycols;
1468     svals   = vals;
1469     for (i=0; i<m; i++) {
1470       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1471       smycols += ourlens[i];
1472       svals   += ourlens[i];
1473       jj++;
1474     }
1475 
1476     /* read in other processors and ship out */
1477     for (i=1; i<size; i++) {
1478       nz   = procsnz[i];
1479       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1480       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1481     }
1482     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1483   } else {
1484     /* receive numeric values */
1485     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1486 
1487     /* receive message of values*/
1488     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1489     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1490     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1491 
1492     /* insert into matrix */
1493     jj      = rstart;
1494     smycols = mycols;
1495     svals   = vals;
1496     for (i=0; i<m; i++) {
1497       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1498       smycols += ourlens[i];
1499       svals   += ourlens[i];
1500       jj++;
1501     }
1502   }
1503   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1504   ierr = PetscFree(vals);CHKERRQ(ierr);
1505   ierr = PetscFree(mycols);CHKERRQ(ierr);
1506   ierr = PetscFree(rowners);CHKERRQ(ierr);
1507 
1508   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1509   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 
1514 
1515 
1516 
1517