xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 958c9bcce270cdc364c8832dcd56bbb1c4a38e24)
1 /*
2    Basic functions for basic parallel dense matrices.
3 */
4 
5 #include "src/mat/impls/dense/mpi/mpidense.h"
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
10 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
11 {
12   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
13   int          m = A->m,rstart = mdn->rstart,ierr;
14   PetscScalar  *array;
15   MPI_Comm     comm;
16 
17   PetscFunctionBegin;
18   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
19 
20   /* The reuse aspect is not implemented efficiently */
21   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
22 
23   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
24   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
25   ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr);
26   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
27   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
28   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
29   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
30   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31 
32   *iscopy = PETSC_TRUE;
33   PetscFunctionReturn(0);
34 }
35 EXTERN_C_END
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatSetValues_MPIDense"
39 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
40 {
41   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
42   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
43   PetscTruth   roworiented = A->roworiented;
44 
45   PetscFunctionBegin;
46   for (i=0; i<m; i++) {
47     if (idxm[i] < 0) continue;
48     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
49     if (idxm[i] >= rstart && idxm[i] < rend) {
50       row = idxm[i] - rstart;
51       if (roworiented) {
52         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
53       } else {
54         for (j=0; j<n; j++) {
55           if (idxn[j] < 0) continue;
56           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
57           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
58         }
59       }
60     } else {
61       if (!A->donotstash) {
62         if (roworiented) {
63           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
64         } else {
65           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
66         }
67       }
68     }
69   }
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatGetValues_MPIDense"
75 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
76 {
77   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
78   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
79 
80   PetscFunctionBegin;
81   for (i=0; i<m; i++) {
82     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
83     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
84     if (idxm[i] >= rstart && idxm[i] < rend) {
85       row = idxm[i] - rstart;
86       for (j=0; j<n; j++) {
87         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
88         if (idxn[j] >= mat->N) {
89           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
90         }
91         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
92       }
93     } else {
94       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
95     }
96   }
97   PetscFunctionReturn(0);
98 }
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "MatGetArray_MPIDense"
102 int MatGetArray_MPIDense(Mat A,PetscScalar *array[])
103 {
104   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
105   int          ierr;
106 
107   PetscFunctionBegin;
108   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
109   PetscFunctionReturn(0);
110 }
111 
112 #undef __FUNCT__
113 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
114 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
115 {
116   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
117   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
118   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
119   PetscScalar  *av,*bv,*v = lmat->v;
120   Mat          newmat;
121 
122   PetscFunctionBegin;
123   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
124   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
125   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
126   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
127 
128   /* No parallel redistribution currently supported! Should really check each index set
129      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
130      original matrix! */
131 
132   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
133   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
134 
135   /* Check submatrix call */
136   if (scall == MAT_REUSE_MATRIX) {
137     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
138     /* Really need to test rows and column sizes! */
139     newmat = *B;
140   } else {
141     /* Create and fill new matrix */
142     ierr = MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);CHKERRQ(ierr);
143     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
144     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
145   }
146 
147   /* Now extract the data pointers and do the copy, column at a time */
148   newmatd = (Mat_MPIDense*)newmat->data;
149   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
150 
151   for (i=0; i<ncols; i++) {
152     av = v + nlrows*icol[i];
153     for (j=0; j<nrows; j++) {
154       *bv++ = av[irow[j] - rstart];
155     }
156   }
157 
158   /* Assemble the matrices so that the correct flags are set */
159   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
160   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
161 
162   /* Free work space */
163   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
164   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
165   *B = newmat;
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatRestoreArray_MPIDense"
171 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
172 {
173   PetscFunctionBegin;
174   PetscFunctionReturn(0);
175 }
176 
177 #undef __FUNCT__
178 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
179 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
180 {
181   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
182   MPI_Comm     comm = mat->comm;
183   int          ierr,nstash,reallocs;
184   InsertMode   addv;
185 
186   PetscFunctionBegin;
187   /* make sure all processors are either in INSERTMODE or ADDMODE */
188   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
189   if (addv == (ADD_VALUES|INSERT_VALUES)) {
190     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
191   }
192   mat->insertmode = addv; /* in case this processor had no cache */
193 
194   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
195   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
196   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
197   PetscFunctionReturn(0);
198 }
199 
200 #undef __FUNCT__
201 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
202 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
203 {
204   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
205   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
206   PetscScalar  *val;
207   InsertMode   addv=mat->insertmode;
208 
209   PetscFunctionBegin;
210   /*  wait on receives */
211   while (1) {
212     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
213     if (!flg) break;
214 
215     for (i=0; i<n;) {
216       /* Now identify the consecutive vals belonging to the same row */
217       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
218       if (j < n) ncols = j-i;
219       else       ncols = n-i;
220       /* Now assemble all these values with a single function call */
221       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
222       i = j;
223     }
224   }
225   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
226 
227   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
228   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
229 
230   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
231     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
232   }
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatZeroEntries_MPIDense"
238 int MatZeroEntries_MPIDense(Mat A)
239 {
240   int          ierr;
241   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
242 
243   PetscFunctionBegin;
244   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatGetBlockSize_MPIDense"
250 int MatGetBlockSize_MPIDense(Mat A,int *bs)
251 {
252   PetscFunctionBegin;
253   *bs = 1;
254   PetscFunctionReturn(0);
255 }
256 
257 /* the code does not do the diagonal entries correctly unless the
258    matrix is square and the column and row owerships are identical.
259    This is a BUG. The only way to fix it seems to be to access
260    mdn->A and mdn->B directly and not through the MatZeroRows()
261    routine.
262 */
263 #undef __FUNCT__
264 #define __FUNCT__ "MatZeroRows_MPIDense"
265 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
266 {
267   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
268   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
269   int            *nprocs,j,idx,nsends;
270   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
271   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
272   int            *lens,imdex,*lrows,*values;
273   MPI_Comm       comm = A->comm;
274   MPI_Request    *send_waits,*recv_waits;
275   MPI_Status     recv_status,*send_status;
276   IS             istmp;
277   PetscTruth     found;
278 
279   PetscFunctionBegin;
280   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
281   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
282 
283   /*  first count number of contributors to each processor */
284   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
285   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
286   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
287   for (i=0; i<N; i++) {
288     idx = rows[i];
289     found = PETSC_FALSE;
290     for (j=0; j<size; j++) {
291       if (idx >= owners[j] && idx < owners[j+1]) {
292         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
293       }
294     }
295     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
296   }
297   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
298 
299   /* inform other processors of number of messages and max length*/
300   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
301 
302   /* post receives:   */
303   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
304   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
305   for (i=0; i<nrecvs; i++) {
306     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
307   }
308 
309   /* do sends:
310       1) starts[i] gives the starting index in svalues for stuff going to
311          the ith processor
312   */
313   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
314   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
315   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
316   starts[0]  = 0;
317   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
318   for (i=0; i<N; i++) {
319     svalues[starts[owner[i]]++] = rows[i];
320   }
321   ISRestoreIndices(is,&rows);
322 
323   starts[0] = 0;
324   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
325   count = 0;
326   for (i=0; i<size; i++) {
327     if (nprocs[2*i+1]) {
328       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
329     }
330   }
331   ierr = PetscFree(starts);CHKERRQ(ierr);
332 
333   base = owners[rank];
334 
335   /*  wait on receives */
336   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
337   source = lens + nrecvs;
338   count  = nrecvs; slen = 0;
339   while (count) {
340     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
341     /* unpack receives into our local space */
342     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
343     source[imdex]  = recv_status.MPI_SOURCE;
344     lens[imdex]  = n;
345     slen += n;
346     count--;
347   }
348   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
349 
350   /* move the data into the send scatter */
351   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
352   count = 0;
353   for (i=0; i<nrecvs; i++) {
354     values = rvalues + i*nmax;
355     for (j=0; j<lens[i]; j++) {
356       lrows[count++] = values[j] - base;
357     }
358   }
359   ierr = PetscFree(rvalues);CHKERRQ(ierr);
360   ierr = PetscFree(lens);CHKERRQ(ierr);
361   ierr = PetscFree(owner);CHKERRQ(ierr);
362   ierr = PetscFree(nprocs);CHKERRQ(ierr);
363 
364   /* actually zap the local rows */
365   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
366   PetscLogObjectParent(A,istmp);
367   ierr = PetscFree(lrows);CHKERRQ(ierr);
368   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
369   ierr = ISDestroy(istmp);CHKERRQ(ierr);
370 
371   /* wait on sends */
372   if (nsends) {
373     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
374     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
375     ierr = PetscFree(send_status);CHKERRQ(ierr);
376   }
377   ierr = PetscFree(send_waits);CHKERRQ(ierr);
378   ierr = PetscFree(svalues);CHKERRQ(ierr);
379 
380   PetscFunctionReturn(0);
381 }
382 
383 #undef __FUNCT__
384 #define __FUNCT__ "MatMult_MPIDense"
385 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
386 {
387   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
388   int          ierr;
389 
390   PetscFunctionBegin;
391   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
392   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
393   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "MatMultAdd_MPIDense"
399 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
400 {
401   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
402   int          ierr;
403 
404   PetscFunctionBegin;
405   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
406   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
407   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "MatMultTranspose_MPIDense"
413 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
414 {
415   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
416   int          ierr;
417   PetscScalar  zero = 0.0;
418 
419   PetscFunctionBegin;
420   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
421   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
422   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
423   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
429 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
430 {
431   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
432   int          ierr;
433 
434   PetscFunctionBegin;
435   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
436   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
437   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
438   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "MatGetDiagonal_MPIDense"
444 int MatGetDiagonal_MPIDense(Mat A,Vec v)
445 {
446   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
447   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
448   int          ierr,len,i,n,m = A->m,radd;
449   PetscScalar  *x,zero = 0.0;
450 
451   PetscFunctionBegin;
452   ierr = VecSet(&zero,v);CHKERRQ(ierr);
453   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
454   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
455   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
456   len  = PetscMin(a->A->m,a->A->n);
457   radd = a->rstart*m;
458   for (i=0; i<len; i++) {
459     x[i] = aloc->v[radd + i*m + i];
460   }
461   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 #undef __FUNCT__
466 #define __FUNCT__ "MatDestroy_MPIDense"
467 int MatDestroy_MPIDense(Mat mat)
468 {
469   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
470   int          ierr;
471 
472   PetscFunctionBegin;
473 
474 #if defined(PETSC_USE_LOG)
475   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
476 #endif
477   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
478   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
479   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
480   if (mdn->lvec)   VecDestroy(mdn->lvec);
481   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
482   if (mdn->factor) {
483     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
484     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
485     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
486     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
487   }
488   ierr = PetscFree(mdn);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 #undef __FUNCT__
493 #define __FUNCT__ "MatView_MPIDense_Binary"
494 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
495 {
496   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
497   int          ierr;
498 
499   PetscFunctionBegin;
500   if (mdn->size == 1) {
501     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
502   }
503   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
504   PetscFunctionReturn(0);
505 }
506 
507 #undef __FUNCT__
508 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
509 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
510 {
511   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
512   int               ierr,size = mdn->size,rank = mdn->rank;
513   PetscViewerType   vtype;
514   PetscTruth        iascii,isdraw;
515   PetscViewer       sviewer;
516   PetscViewerFormat format;
517 
518   PetscFunctionBegin;
519   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
520   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
521   if (iascii) {
522     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
523     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
524     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
525       MatInfo info;
526       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
527       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
528                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
529       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
530       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
531       PetscFunctionReturn(0);
532     } else if (format == PETSC_VIEWER_ASCII_INFO) {
533       PetscFunctionReturn(0);
534     }
535   } else if (isdraw) {
536     PetscDraw       draw;
537     PetscTruth isnull;
538 
539     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
540     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
541     if (isnull) PetscFunctionReturn(0);
542   }
543 
544   if (size == 1) {
545     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
546   } else {
547     /* assemble the entire matrix onto first processor. */
548     Mat               A;
549     int               M = mat->M,N = mat->N,m,row,i,nz;
550     const int         *cols;
551     const PetscScalar *vals;
552 
553     if (!rank) {
554       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
555     } else {
556       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
557     }
558     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
559     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
560     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
561     PetscLogObjectParent(mat,A);
562 
563     /* Copy the matrix ... This isn't the most efficient means,
564        but it's quick for now */
565     row = mdn->rstart; m = mdn->A->m;
566     for (i=0; i<m; i++) {
567       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
568       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
569       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
570       row++;
571     }
572 
573     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
574     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
575     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
576     if (!rank) {
577       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
578     }
579     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
580     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
581     ierr = MatDestroy(A);CHKERRQ(ierr);
582   }
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNCT__
587 #define __FUNCT__ "MatView_MPIDense"
588 int MatView_MPIDense(Mat mat,PetscViewer viewer)
589 {
590   int        ierr;
591   PetscTruth iascii,isbinary,isdraw,issocket;
592 
593   PetscFunctionBegin;
594 
595   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
596   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
599 
600   if (iascii || issocket || isdraw) {
601     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
602   } else if (isbinary) {
603     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
604   } else {
605     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
606   }
607   PetscFunctionReturn(0);
608 }
609 
610 #undef __FUNCT__
611 #define __FUNCT__ "MatGetInfo_MPIDense"
612 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
613 {
614   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
615   Mat          mdn = mat->A;
616   int          ierr;
617   PetscReal    isend[5],irecv[5];
618 
619   PetscFunctionBegin;
620   info->rows_global    = (double)A->M;
621   info->columns_global = (double)A->N;
622   info->rows_local     = (double)A->m;
623   info->columns_local  = (double)A->N;
624   info->block_size     = 1.0;
625   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
626   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
627   isend[3] = info->memory;  isend[4] = info->mallocs;
628   if (flag == MAT_LOCAL) {
629     info->nz_used      = isend[0];
630     info->nz_allocated = isend[1];
631     info->nz_unneeded  = isend[2];
632     info->memory       = isend[3];
633     info->mallocs      = isend[4];
634   } else if (flag == MAT_GLOBAL_MAX) {
635     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
636     info->nz_used      = irecv[0];
637     info->nz_allocated = irecv[1];
638     info->nz_unneeded  = irecv[2];
639     info->memory       = irecv[3];
640     info->mallocs      = irecv[4];
641   } else if (flag == MAT_GLOBAL_SUM) {
642     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
643     info->nz_used      = irecv[0];
644     info->nz_allocated = irecv[1];
645     info->nz_unneeded  = irecv[2];
646     info->memory       = irecv[3];
647     info->mallocs      = irecv[4];
648   }
649   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
650   info->fill_ratio_needed = 0;
651   info->factor_mallocs    = 0;
652   PetscFunctionReturn(0);
653 }
654 
655 #undef __FUNCT__
656 #define __FUNCT__ "MatSetOption_MPIDense"
657 int MatSetOption_MPIDense(Mat A,MatOption op)
658 {
659   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
660   int          ierr;
661 
662   PetscFunctionBegin;
663   switch (op) {
664   case MAT_NO_NEW_NONZERO_LOCATIONS:
665   case MAT_YES_NEW_NONZERO_LOCATIONS:
666   case MAT_NEW_NONZERO_LOCATION_ERR:
667   case MAT_NEW_NONZERO_ALLOCATION_ERR:
668   case MAT_COLUMNS_SORTED:
669   case MAT_COLUMNS_UNSORTED:
670     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
671     break;
672   case MAT_ROW_ORIENTED:
673     a->roworiented = PETSC_TRUE;
674     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
675     break;
676   case MAT_ROWS_SORTED:
677   case MAT_ROWS_UNSORTED:
678   case MAT_YES_NEW_DIAGONALS:
679   case MAT_USE_HASH_TABLE:
680     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
681     break;
682   case MAT_COLUMN_ORIENTED:
683     a->roworiented = PETSC_FALSE;
684     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
685     break;
686   case MAT_IGNORE_OFF_PROC_ENTRIES:
687     a->donotstash = PETSC_TRUE;
688     break;
689   case MAT_NO_NEW_DIAGONALS:
690     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
691   case MAT_SYMMETRIC:
692   case MAT_STRUCTURALLY_SYMMETRIC:
693   case MAT_NOT_SYMMETRIC:
694   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
695   case MAT_HERMITIAN:
696   case MAT_NOT_HERMITIAN:
697   case MAT_SYMMETRY_ETERNAL:
698   case MAT_NOT_SYMMETRY_ETERNAL:
699     break;
700   default:
701     SETERRQ(PETSC_ERR_SUP,"unknown option");
702   }
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNCT__
707 #define __FUNCT__ "MatGetRow_MPIDense"
708 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
709 {
710   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
711   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
712 
713   PetscFunctionBegin;
714   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
715   lrow = row - rstart;
716   ierr = MatGetRow(mat->A,lrow,nz,(const int **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatRestoreRow_MPIDense"
722 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
723 {
724   int ierr;
725 
726   PetscFunctionBegin;
727   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
728   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatDiagonalScale_MPIDense"
734 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
735 {
736   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
737   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
738   PetscScalar  *l,*r,x,*v;
739   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
740 
741   PetscFunctionBegin;
742   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
743   if (ll) {
744     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
745     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
746     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
747     for (i=0; i<m; i++) {
748       x = l[i];
749       v = mat->v + i;
750       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
751     }
752     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
753     PetscLogFlops(n*m);
754   }
755   if (rr) {
756     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
757     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
758     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
759     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
760     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
761     for (i=0; i<n; i++) {
762       x = r[i];
763       v = mat->v + i*m;
764       for (j=0; j<m; j++) { (*v++) *= x;}
765     }
766     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
767     PetscLogFlops(n*m);
768   }
769   PetscFunctionReturn(0);
770 }
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatNorm_MPIDense"
774 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
775 {
776   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
777   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
778   int          ierr,i,j;
779   PetscReal    sum = 0.0;
780   PetscScalar  *v = mat->v;
781 
782   PetscFunctionBegin;
783   if (mdn->size == 1) {
784     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
785   } else {
786     if (type == NORM_FROBENIUS) {
787       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
788 #if defined(PETSC_USE_COMPLEX)
789         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
790 #else
791         sum += (*v)*(*v); v++;
792 #endif
793       }
794       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
795       *nrm = sqrt(*nrm);
796       PetscLogFlops(2*mdn->A->n*mdn->A->m);
797     } else if (type == NORM_1) {
798       PetscReal *tmp,*tmp2;
799       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
800       tmp2 = tmp + A->N;
801       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
802       *nrm = 0.0;
803       v = mat->v;
804       for (j=0; j<mdn->A->n; j++) {
805         for (i=0; i<mdn->A->m; i++) {
806           tmp[j] += PetscAbsScalar(*v);  v++;
807         }
808       }
809       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
810       for (j=0; j<A->N; j++) {
811         if (tmp2[j] > *nrm) *nrm = tmp2[j];
812       }
813       ierr = PetscFree(tmp);CHKERRQ(ierr);
814       PetscLogFlops(A->n*A->m);
815     } else if (type == NORM_INFINITY) { /* max row norm */
816       PetscReal ntemp;
817       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
818       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
819     } else {
820       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
821     }
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 #undef __FUNCT__
827 #define __FUNCT__ "MatTranspose_MPIDense"
828 int MatTranspose_MPIDense(Mat A,Mat *matout)
829 {
830   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
831   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
832   Mat          B;
833   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
834   int          j,i,ierr;
835   PetscScalar  *v;
836 
837   PetscFunctionBegin;
838   if (!matout && M != N) {
839     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
840   }
841   ierr = MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);CHKERRQ(ierr);
842   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
843   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
844 
845   m = a->A->m; n = a->A->n; v = Aloc->v;
846   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
847   for (j=0; j<n; j++) {
848     for (i=0; i<m; i++) rwork[i] = rstart + i;
849     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
850     v   += m;
851   }
852   ierr = PetscFree(rwork);CHKERRQ(ierr);
853   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
855   if (matout) {
856     *matout = B;
857   } else {
858     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 #include "petscblaslapack.h"
864 #undef __FUNCT__
865 #define __FUNCT__ "MatScale_MPIDense"
866 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
867 {
868   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
869   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
870   int          one = 1,nz;
871 
872   PetscFunctionBegin;
873   nz = inA->m*inA->N;
874   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
875   PetscLogFlops(nz);
876   PetscFunctionReturn(0);
877 }
878 
879 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
880 
881 #undef __FUNCT__
882 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
883 int MatSetUpPreallocation_MPIDense(Mat A)
884 {
885   int        ierr;
886 
887   PetscFunctionBegin;
888   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 /* -------------------------------------------------------------------*/
893 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
894        MatGetRow_MPIDense,
895        MatRestoreRow_MPIDense,
896        MatMult_MPIDense,
897 /* 4*/ MatMultAdd_MPIDense,
898        MatMultTranspose_MPIDense,
899        MatMultTransposeAdd_MPIDense,
900        0,
901        0,
902        0,
903 /*10*/ 0,
904        0,
905        0,
906        0,
907        MatTranspose_MPIDense,
908 /*15*/ MatGetInfo_MPIDense,
909        0,
910        MatGetDiagonal_MPIDense,
911        MatDiagonalScale_MPIDense,
912        MatNorm_MPIDense,
913 /*20*/ MatAssemblyBegin_MPIDense,
914        MatAssemblyEnd_MPIDense,
915        0,
916        MatSetOption_MPIDense,
917        MatZeroEntries_MPIDense,
918 /*25*/ MatZeroRows_MPIDense,
919        0,
920        0,
921        0,
922        0,
923 /*30*/ MatSetUpPreallocation_MPIDense,
924        0,
925        0,
926        MatGetArray_MPIDense,
927        MatRestoreArray_MPIDense,
928 /*35*/ MatDuplicate_MPIDense,
929        0,
930        0,
931        0,
932        0,
933 /*40*/ 0,
934        MatGetSubMatrices_MPIDense,
935        0,
936        MatGetValues_MPIDense,
937        0,
938 /*45*/ 0,
939        MatScale_MPIDense,
940        0,
941        0,
942        0,
943 /*50*/ MatGetBlockSize_MPIDense,
944        0,
945        0,
946        0,
947        0,
948 /*55*/ 0,
949        0,
950        0,
951        0,
952        0,
953 /*60*/ MatGetSubMatrix_MPIDense,
954        MatDestroy_MPIDense,
955        MatView_MPIDense,
956        MatGetPetscMaps_Petsc,
957        0,
958 /*65*/ 0,
959        0,
960        0,
961        0,
962        0,
963 /*70*/ 0,
964        0,
965        0,
966        0,
967        0,
968 /*75*/ 0,
969        0,
970        0,
971        0,
972        0,
973 /*80*/ 0,
974        0,
975        0,
976        0,
977 /*85*/ MatLoad_MPIDense};
978 
979 EXTERN_C_BEGIN
980 #undef __FUNCT__
981 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
982 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
983 {
984   Mat_MPIDense *a;
985   int          ierr;
986 
987   PetscFunctionBegin;
988   mat->preallocated = PETSC_TRUE;
989   /* Note:  For now, when data is specified above, this assumes the user correctly
990    allocates the local dense storage space.  We should add error checking. */
991 
992   a    = (Mat_MPIDense*)mat->data;
993   ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr);
994   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
995   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
996   PetscLogObjectParent(mat,a->A);
997   PetscFunctionReturn(0);
998 }
999 EXTERN_C_END
1000 
1001 /*MC
1002    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1003 
1004    Options Database Keys:
1005 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1006 
1007   Level: beginner
1008 
1009 .seealso: MatCreateMPIDense
1010 M*/
1011 
1012 EXTERN_C_BEGIN
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MatCreate_MPIDense"
1015 int MatCreate_MPIDense(Mat mat)
1016 {
1017   Mat_MPIDense *a;
1018   int          ierr,i;
1019 
1020   PetscFunctionBegin;
1021   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1022   mat->data         = (void*)a;
1023   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1024   mat->factor       = 0;
1025   mat->mapping      = 0;
1026 
1027   a->factor       = 0;
1028   mat->insertmode = NOT_SET_VALUES;
1029   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1030   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1031 
1032   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1033   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1034   a->nvec = mat->n;
1035 
1036   /* the information in the maps duplicates the information computed below, eventually
1037      we should remove the duplicate information that is not contained in the maps */
1038   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1039   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1040 
1041   /* build local table of row and column ownerships */
1042   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1043   a->cowners = a->rowners + a->size + 1;
1044   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1045   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1046   a->rowners[0] = 0;
1047   for (i=2; i<=a->size; i++) {
1048     a->rowners[i] += a->rowners[i-1];
1049   }
1050   a->rstart = a->rowners[a->rank];
1051   a->rend   = a->rowners[a->rank+1];
1052   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1053   a->cowners[0] = 0;
1054   for (i=2; i<=a->size; i++) {
1055     a->cowners[i] += a->cowners[i-1];
1056   }
1057 
1058   /* build cache for off array entries formed */
1059   a->donotstash = PETSC_FALSE;
1060   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1061 
1062   /* stuff used for matrix vector multiply */
1063   a->lvec        = 0;
1064   a->Mvctx       = 0;
1065   a->roworiented = PETSC_TRUE;
1066 
1067   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1068                                      "MatGetDiagonalBlock_MPIDense",
1069                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1070   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1071                                      "MatMPIDenseSetPreallocation_MPIDense",
1072                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 EXTERN_C_END
1076 
1077 /*MC
1078    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1079 
1080    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1081    and MATMPIDENSE otherwise.
1082 
1083    Options Database Keys:
1084 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1085 
1086   Level: beginner
1087 
1088 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1089 M*/
1090 
1091 EXTERN_C_BEGIN
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatCreate_Dense"
1094 int MatCreate_Dense(Mat A) {
1095   int ierr,size;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1099   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1100   if (size == 1) {
1101     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1102   } else {
1103     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1104   }
1105   PetscFunctionReturn(0);
1106 }
1107 EXTERN_C_END
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1111 /*@C
1112    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1113 
1114    Not collective
1115 
1116    Input Parameters:
1117 .  A - the matrix
1118 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1119    to control all matrix memory allocation.
1120 
1121    Notes:
1122    The dense format is fully compatible with standard Fortran 77
1123    storage by columns.
1124 
1125    The data input variable is intended primarily for Fortran programmers
1126    who wish to allocate their own matrix memory space.  Most users should
1127    set data=PETSC_NULL.
1128 
1129    Level: intermediate
1130 
1131 .keywords: matrix,dense, parallel
1132 
1133 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1134 @*/
1135 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1136 {
1137   int ierr,(*f)(Mat,PetscScalar *);
1138 
1139   PetscFunctionBegin;
1140   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1141   if (f) {
1142     ierr = (*f)(mat,data);CHKERRQ(ierr);
1143   }
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatCreateMPIDense"
1149 /*@C
1150    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1151 
1152    Collective on MPI_Comm
1153 
1154    Input Parameters:
1155 +  comm - MPI communicator
1156 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1157 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1158 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1159 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1160 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1161    to control all matrix memory allocation.
1162 
1163    Output Parameter:
1164 .  A - the matrix
1165 
1166    Notes:
1167    The dense format is fully compatible with standard Fortran 77
1168    storage by columns.
1169 
1170    The data input variable is intended primarily for Fortran programmers
1171    who wish to allocate their own matrix memory space.  Most users should
1172    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1173 
1174    The user MUST specify either the local or global matrix dimensions
1175    (possibly both).
1176 
1177    Level: intermediate
1178 
1179 .keywords: matrix,dense, parallel
1180 
1181 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1182 @*/
1183 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1184 {
1185   int ierr,size;
1186 
1187   PetscFunctionBegin;
1188   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1189   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1190   if (size > 1) {
1191     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1192     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1193   } else {
1194     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1195     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MatDuplicate_MPIDense"
1202 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1203 {
1204   Mat          mat;
1205   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1206   int          ierr;
1207 
1208   PetscFunctionBegin;
1209   *newmat       = 0;
1210   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1211   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1212   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1213   mat->data         = (void*)a;
1214   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1215   mat->factor       = A->factor;
1216   mat->assembled    = PETSC_TRUE;
1217   mat->preallocated = PETSC_TRUE;
1218 
1219   a->rstart       = oldmat->rstart;
1220   a->rend         = oldmat->rend;
1221   a->size         = oldmat->size;
1222   a->rank         = oldmat->rank;
1223   mat->insertmode = NOT_SET_VALUES;
1224   a->nvec         = oldmat->nvec;
1225   a->donotstash   = oldmat->donotstash;
1226   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1227   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1228   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1229   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1230 
1231   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1232   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1233   PetscLogObjectParent(mat,a->A);
1234   *newmat = mat;
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #include "petscsys.h"
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1242 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,const MatType type,Mat *newmat)
1243 {
1244   int          *rowners,i,size,rank,m,ierr,nz,j;
1245   PetscScalar  *array,*vals,*vals_ptr;
1246   MPI_Status   status;
1247 
1248   PetscFunctionBegin;
1249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1251 
1252   /* determine ownership of all rows */
1253   m          = M/size + ((M % size) > rank);
1254   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1255   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1256   rowners[0] = 0;
1257   for (i=2; i<=size; i++) {
1258     rowners[i] += rowners[i-1];
1259   }
1260 
1261   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1262   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1263   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1264   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1265 
1266   if (!rank) {
1267     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1268 
1269     /* read in my part of the matrix numerical values  */
1270     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1271 
1272     /* insert into matrix-by row (this is why cannot directly read into array */
1273     vals_ptr = vals;
1274     for (i=0; i<m; i++) {
1275       for (j=0; j<N; j++) {
1276         array[i + j*m] = *vals_ptr++;
1277       }
1278     }
1279 
1280     /* read in other processors and ship out */
1281     for (i=1; i<size; i++) {
1282       nz   = (rowners[i+1] - rowners[i])*N;
1283       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1284       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1285     }
1286   } else {
1287     /* receive numeric values */
1288     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1289 
1290     /* receive message of values*/
1291     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1292 
1293     /* insert into matrix-by row (this is why cannot directly read into array */
1294     vals_ptr = vals;
1295     for (i=0; i<m; i++) {
1296       for (j=0; j<N; j++) {
1297         array[i + j*m] = *vals_ptr++;
1298       }
1299     }
1300   }
1301   ierr = PetscFree(rowners);CHKERRQ(ierr);
1302   ierr = PetscFree(vals);CHKERRQ(ierr);
1303   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1304   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatLoad_MPIDense"
1310 int MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1311 {
1312   Mat          A;
1313   PetscScalar  *vals,*svals;
1314   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1315   MPI_Status   status;
1316   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1317   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1318   int          tag = ((PetscObject)viewer)->tag;
1319   int          i,nz,ierr,j,rstart,rend,fd;
1320 
1321   PetscFunctionBegin;
1322   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1323   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1324   if (!rank) {
1325     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1326     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1327     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1328   }
1329 
1330   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1331   M = header[1]; N = header[2]; nz = header[3];
1332 
1333   /*
1334        Handle case where matrix is stored on disk as a dense matrix
1335   */
1336   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1337     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1338     PetscFunctionReturn(0);
1339   }
1340 
1341   /* determine ownership of all rows */
1342   m          = M/size + ((M % size) > rank);
1343   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1344   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1345   rowners[0] = 0;
1346   for (i=2; i<=size; i++) {
1347     rowners[i] += rowners[i-1];
1348   }
1349   rstart = rowners[rank];
1350   rend   = rowners[rank+1];
1351 
1352   /* distribute row lengths to all processors */
1353   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1354   offlens = ourlens + (rend-rstart);
1355   if (!rank) {
1356     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1357     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1358     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1359     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1360     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1361     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1362   } else {
1363     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1364   }
1365 
1366   if (!rank) {
1367     /* calculate the number of nonzeros on each processor */
1368     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1369     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1370     for (i=0; i<size; i++) {
1371       for (j=rowners[i]; j< rowners[i+1]; j++) {
1372         procsnz[i] += rowlengths[j];
1373       }
1374     }
1375     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1376 
1377     /* determine max buffer needed and allocate it */
1378     maxnz = 0;
1379     for (i=0; i<size; i++) {
1380       maxnz = PetscMax(maxnz,procsnz[i]);
1381     }
1382     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1383 
1384     /* read in my part of the matrix column indices  */
1385     nz = procsnz[0];
1386     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1387     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1388 
1389     /* read in every one elses and ship off */
1390     for (i=1; i<size; i++) {
1391       nz   = procsnz[i];
1392       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1393       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1394     }
1395     ierr = PetscFree(cols);CHKERRQ(ierr);
1396   } else {
1397     /* determine buffer space needed for message */
1398     nz = 0;
1399     for (i=0; i<m; i++) {
1400       nz += ourlens[i];
1401     }
1402     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1403 
1404     /* receive message of column indices*/
1405     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1406     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1407     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1408   }
1409 
1410   /* loop over local rows, determining number of off diagonal entries */
1411   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1412   jj = 0;
1413   for (i=0; i<m; i++) {
1414     for (j=0; j<ourlens[i]; j++) {
1415       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1416       jj++;
1417     }
1418   }
1419 
1420   /* create our matrix */
1421   for (i=0; i<m; i++) {
1422     ourlens[i] -= offlens[i];
1423   }
1424   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1425   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1426   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1427   A = *newmat;
1428   for (i=0; i<m; i++) {
1429     ourlens[i] += offlens[i];
1430   }
1431 
1432   if (!rank) {
1433     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1434 
1435     /* read in my part of the matrix numerical values  */
1436     nz = procsnz[0];
1437     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1438 
1439     /* insert into matrix */
1440     jj      = rstart;
1441     smycols = mycols;
1442     svals   = vals;
1443     for (i=0; i<m; i++) {
1444       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1445       smycols += ourlens[i];
1446       svals   += ourlens[i];
1447       jj++;
1448     }
1449 
1450     /* read in other processors and ship out */
1451     for (i=1; i<size; i++) {
1452       nz   = procsnz[i];
1453       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1454       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1455     }
1456     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1457   } else {
1458     /* receive numeric values */
1459     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1460 
1461     /* receive message of values*/
1462     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1463     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1464     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1465 
1466     /* insert into matrix */
1467     jj      = rstart;
1468     smycols = mycols;
1469     svals   = vals;
1470     for (i=0; i<m; i++) {
1471       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1472       smycols += ourlens[i];
1473       svals   += ourlens[i];
1474       jj++;
1475     }
1476   }
1477   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1478   ierr = PetscFree(vals);CHKERRQ(ierr);
1479   ierr = PetscFree(mycols);CHKERRQ(ierr);
1480   ierr = PetscFree(rowners);CHKERRQ(ierr);
1481 
1482   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1483   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 
1488 
1489 
1490 
1491