xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 63b9fa5e258ef37d5bcff491ebacde33f30ed49b)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift=0;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412 #if defined(PETSC_HAVE_SUPERLUDIST)
413   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
414   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
415 #endif
416 
417 #if defined(PETSC_HAVE_SPOOLES)
418   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
419   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
420 #endif
421 
422 #if defined(PETSC_HAVE_MUMPS)
423   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
424   if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); }
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
431 int MatZeroEntries_MPIAIJ(Mat A)
432 {
433   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
434   int        ierr;
435 
436   PetscFunctionBegin;
437   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
438   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "MatZeroRows_MPIAIJ"
444 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
445 {
446   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
447   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
448   int            *nprocs,j,idx,nsends,row;
449   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
450   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
451   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
452   MPI_Comm       comm = A->comm;
453   MPI_Request    *send_waits,*recv_waits;
454   MPI_Status     recv_status,*send_status;
455   IS             istmp;
456   PetscTruth     found;
457 
458   PetscFunctionBegin;
459   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
460   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
461 
462   /*  first count number of contributors to each processor */
463   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
464   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
465   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
466   for (i=0; i<N; i++) {
467     idx = rows[i];
468     found = PETSC_FALSE;
469     for (j=0; j<size; j++) {
470       if (idx >= owners[j] && idx < owners[j+1]) {
471         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
472       }
473     }
474     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
475   }
476   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
477 
478   /* inform other processors of number of messages and max length*/
479   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
480 
481   /* post receives:   */
482   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
484   for (i=0; i<nrecvs; i++) {
485     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
486   }
487 
488   /* do sends:
489       1) starts[i] gives the starting index in svalues for stuff going to
490          the ith processor
491   */
492   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
493   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
494   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
495   starts[0] = 0;
496   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
497   for (i=0; i<N; i++) {
498     svalues[starts[owner[i]]++] = rows[i];
499   }
500   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
501 
502   starts[0] = 0;
503   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
504   count = 0;
505   for (i=0; i<size; i++) {
506     if (nprocs[2*i+1]) {
507       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
508     }
509   }
510   ierr = PetscFree(starts);CHKERRQ(ierr);
511 
512   base = owners[rank];
513 
514   /*  wait on receives */
515   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
516   source = lens + nrecvs;
517   count  = nrecvs; slen = 0;
518   while (count) {
519     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
520     /* unpack receives into our local space */
521     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
522     source[imdex]  = recv_status.MPI_SOURCE;
523     lens[imdex]    = n;
524     slen          += n;
525     count--;
526   }
527   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
528 
529   /* move the data into the send scatter */
530   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
531   count = 0;
532   for (i=0; i<nrecvs; i++) {
533     values = rvalues + i*nmax;
534     for (j=0; j<lens[i]; j++) {
535       lrows[count++] = values[j] - base;
536     }
537   }
538   ierr = PetscFree(rvalues);CHKERRQ(ierr);
539   ierr = PetscFree(lens);CHKERRQ(ierr);
540   ierr = PetscFree(owner);CHKERRQ(ierr);
541   ierr = PetscFree(nprocs);CHKERRQ(ierr);
542 
543   /* actually zap the local rows */
544   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
545   PetscLogObjectParent(A,istmp);
546 
547   /*
548         Zero the required rows. If the "diagonal block" of the matrix
549      is square and the user wishes to set the diagonal we use seperate
550      code so that MatSetValues() is not called for each diagonal allocating
551      new memory, thus calling lots of mallocs and slowing things down.
552 
553        Contributed by: Mathew Knepley
554   */
555   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
556   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
557   if (diag && (l->A->M == l->A->N)) {
558     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
559   } else if (diag) {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
562       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
564     }
565     for (i = 0; i < slen; i++) {
566       row  = lrows[i] + rstart;
567       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
568     }
569     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
571   } else {
572     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
573   }
574   ierr = ISDestroy(istmp);CHKERRQ(ierr);
575   ierr = PetscFree(lrows);CHKERRQ(ierr);
576 
577   /* wait on sends */
578   if (nsends) {
579     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
580     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
581     ierr = PetscFree(send_status);CHKERRQ(ierr);
582   }
583   ierr = PetscFree(send_waits);CHKERRQ(ierr);
584   ierr = PetscFree(svalues);CHKERRQ(ierr);
585 
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMult_MPIAIJ"
591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   int        ierr,nt;
595 
596   PetscFunctionBegin;
597   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
598   if (nt != A->n) {
599     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
600   }
601   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
603   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultAdd_MPIAIJ"
610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
611 {
612   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
613   int        ierr;
614 
615   PetscFunctionBegin;
616   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
617   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
618   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
619   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
626 {
627   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
628   int        ierr;
629 
630   PetscFunctionBegin;
631   /* do nondiagonal part */
632   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
633   /* send it on its way */
634   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635   /* do local part */
636   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
637   /* receive remote parts: note this assumes the values are not actually */
638   /* inserted in yy until the next line, which is true for my implementation*/
639   /* but is not perhaps always true. */
640   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 EXTERN_C_BEGIN
645 #undef __FUNCT__
646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
648 {
649   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
650   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
651   IS         Me,Notme;
652   int        M,N,first,last,*notme,i, ierr;
653 
654   PetscFunctionBegin;
655 
656   /* Easy test: symmetric diagonal block */
657   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
658   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
659   if (!*f) PetscFunctionReturn(0);
660 
661   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
662   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
663   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
664   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
665   for (i=0; i<first; i++) notme[i] = i;
666   for (i=last; i<M; i++) notme[i-last+first] = i;
667   ierr = ISCreateGeneral
668     (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
669   ierr = ISCreateStride
670     (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
671   ierr = MatGetSubMatrices
672     (Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
673   Aoff = Aoffs[0];
674   ierr = MatGetSubMatrices
675     (Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
676   Boff = Boffs[0];
677   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
678   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
679   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
680   ierr = ISDestroy(Me); CHKERRQ(ierr);
681   ierr = ISDestroy(Notme); CHKERRQ(ierr);
682 
683   PetscFunctionReturn(0);
684 }
685 EXTERN_C_END
686 
687 #undef __FUNCT__
688 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
689 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
690 {
691   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
692   int        ierr;
693 
694   PetscFunctionBegin;
695   /* do nondiagonal part */
696   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
697   /* send it on its way */
698   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
699   /* do local part */
700   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
701   /* receive remote parts: note this assumes the values are not actually */
702   /* inserted in yy until the next line, which is true for my implementation*/
703   /* but is not perhaps always true. */
704   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 /*
709   This only works correctly for square matrices where the subblock A->A is the
710    diagonal block
711 */
712 #undef __FUNCT__
713 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
714 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
715 {
716   int        ierr;
717   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
718 
719   PetscFunctionBegin;
720   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
721   if (a->rstart != a->cstart || a->rend != a->cend) {
722     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
723   }
724   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatScale_MPIAIJ"
730 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
731 {
732   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
733   int        ierr;
734 
735   PetscFunctionBegin;
736   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
737   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatDestroy_MPIAIJ"
743 int MatDestroy_MPIAIJ(Mat mat)
744 {
745   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
746   int        ierr;
747 
748   PetscFunctionBegin;
749 #if defined(PETSC_USE_LOG)
750   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
751 #endif
752   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
753   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
754   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
755   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
756 #if defined (PETSC_USE_CTABLE)
757   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
758 #else
759   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
760 #endif
761   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
762   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
763   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
764   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
765   ierr = PetscFree(aij);CHKERRQ(ierr);
766   PetscFunctionReturn(0);
767 }
768 
769 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
770 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
771 extern int MatFactorInfo_MUMPS(Mat,PetscViewer);
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "MatView_MPIAIJ_Binary"
775 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
776 {
777   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
778   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
779   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
780   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
781   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
782   PetscScalar       *column_values;
783 
784   PetscFunctionBegin;
785   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
786   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
787   nz   = A->nz + B->nz;
788   if (rank == 0) {
789     header[0] = MAT_FILE_COOKIE;
790     header[1] = mat->M;
791     header[2] = mat->N;
792     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
793     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
794     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
795     /* get largest number of rows any processor has */
796     rlen = mat->m;
797     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
798     for (i=1; i<size; i++) {
799       rlen = PetscMax(rlen,range[i+1] - range[i]);
800     }
801   } else {
802     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
803     rlen = mat->m;
804   }
805 
806   /* load up the local row counts */
807   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
808   for (i=0; i<mat->m; i++) {
809     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
810   }
811 
812   /* store the row lengths to the file */
813   if (rank == 0) {
814     MPI_Status status;
815     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
816     for (i=1; i<size; i++) {
817       rlen = range[i+1] - range[i];
818       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
819       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
820     }
821   } else {
822     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
823   }
824   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
825 
826   /* load up the local column indices */
827   nzmax = nz; /* )th processor needs space a largest processor needs */
828   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
829   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
830   cnt  = 0;
831   for (i=0; i<mat->m; i++) {
832     for (j=B->i[i]; j<B->i[i+1]; j++) {
833       if ( (col = garray[B->j[j]]) > cstart) break;
834       column_indices[cnt++] = col;
835     }
836     for (k=A->i[i]; k<A->i[i+1]; k++) {
837       column_indices[cnt++] = A->j[k] + cstart;
838     }
839     for (; j<B->i[i+1]; j++) {
840       column_indices[cnt++] = garray[B->j[j]];
841     }
842   }
843   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
844 
845   /* store the column indices to the file */
846   if (rank == 0) {
847     MPI_Status status;
848     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
849     for (i=1; i<size; i++) {
850       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
851       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
852       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
853       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
854     }
855   } else {
856     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
857     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
858   }
859   ierr = PetscFree(column_indices);CHKERRQ(ierr);
860 
861   /* load up the local column values */
862   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
863   cnt  = 0;
864   for (i=0; i<mat->m; i++) {
865     for (j=B->i[i]; j<B->i[i+1]; j++) {
866       if ( garray[B->j[j]] > cstart) break;
867       column_values[cnt++] = B->a[j];
868     }
869     for (k=A->i[i]; k<A->i[i+1]; k++) {
870       column_values[cnt++] = A->a[k];
871     }
872     for (; j<B->i[i+1]; j++) {
873       column_values[cnt++] = B->a[j];
874     }
875   }
876   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
877 
878   /* store the column values to the file */
879   if (rank == 0) {
880     MPI_Status status;
881     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
882     for (i=1; i<size; i++) {
883       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
884       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
885       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
886       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
887     }
888   } else {
889     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
890     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
891   }
892   ierr = PetscFree(column_values);CHKERRQ(ierr);
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
898 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
899 {
900   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
901   int               ierr,rank = aij->rank,size = aij->size;
902   PetscTruth        isdraw,isascii,flg,isbinary;
903   PetscViewer       sviewer;
904   PetscViewerFormat format;
905 
906   PetscFunctionBegin;
907   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
908   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
909   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
910   if (isascii) {
911     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
912     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
913       MatInfo info;
914       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
915       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
916       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
917       if (flg) {
918         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
919 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
920       } else {
921         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
922 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
923       }
924       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
925       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
926       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
927       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
928       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
929       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
930       PetscFunctionReturn(0);
931     } else if (format == PETSC_VIEWER_ASCII_INFO) {
932       PetscFunctionReturn(0);
933     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
934 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
935       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
936 #endif
937 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
938       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
939 #endif
940 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE)
941       ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr);
942 #endif
943       PetscFunctionReturn(0);
944     }
945   } else if (isbinary) {
946     if (size == 1) {
947       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
948       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
949     } else {
950       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
951     }
952     PetscFunctionReturn(0);
953   } else if (isdraw) {
954     PetscDraw  draw;
955     PetscTruth isnull;
956     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
957     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
958   }
959 
960   if (size == 1) {
961     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
962     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
963   } else {
964     /* assemble the entire matrix onto first processor. */
965     Mat         A;
966     Mat_SeqAIJ *Aloc;
967     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
968     PetscScalar *a;
969 
970     if (!rank) {
971       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
972     } else {
973       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
974     }
975     PetscLogObjectParent(mat,A);
976 
977     /* copy over the A part */
978     Aloc = (Mat_SeqAIJ*)aij->A->data;
979     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
980     row = aij->rstart;
981     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
982     for (i=0; i<m; i++) {
983       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
984       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
985     }
986     aj = Aloc->j;
987     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
988 
989     /* copy over the B part */
990     Aloc = (Mat_SeqAIJ*)aij->B->data;
991     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
992     row  = aij->rstart;
993     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
994     ct   = cols;
995     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
996     for (i=0; i<m; i++) {
997       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
998       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
999     }
1000     ierr = PetscFree(ct);CHKERRQ(ierr);
1001     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1002     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1003     /*
1004        Everyone has to call to draw the matrix since the graphics waits are
1005        synchronized across all processors that share the PetscDraw object
1006     */
1007     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1008     if (!rank) {
1009       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1010       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1011     }
1012     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1013     ierr = MatDestroy(A);CHKERRQ(ierr);
1014   }
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatView_MPIAIJ"
1020 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1021 {
1022   int        ierr;
1023   PetscTruth isascii,isdraw,issocket,isbinary;
1024 
1025   PetscFunctionBegin;
1026   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1027   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1028   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1029   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1030   if (isascii || isdraw || isbinary || issocket) {
1031     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1032   } else {
1033     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1034   }
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 
1039 
1040 #undef __FUNCT__
1041 #define __FUNCT__ "MatRelax_MPIAIJ"
1042 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1043 {
1044   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1045   int          ierr;
1046   Vec          bb1;
1047   PetscScalar  mone=-1.0;
1048 
1049   PetscFunctionBegin;
1050   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1051 
1052   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1053 
1054   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1055     if (flag & SOR_ZERO_INITIAL_GUESS) {
1056       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1057       its--;
1058     }
1059 
1060     while (its--) {
1061       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1062       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1063 
1064       /* update rhs: bb1 = bb - B*x */
1065       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1066       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1067 
1068       /* local sweep */
1069       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1070       CHKERRQ(ierr);
1071     }
1072   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1073     if (flag & SOR_ZERO_INITIAL_GUESS) {
1074       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1075       its--;
1076     }
1077     while (its--) {
1078       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1079       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1080 
1081       /* update rhs: bb1 = bb - B*x */
1082       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1083       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1084 
1085       /* local sweep */
1086       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1087       CHKERRQ(ierr);
1088     }
1089   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1090     if (flag & SOR_ZERO_INITIAL_GUESS) {
1091       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1092       its--;
1093     }
1094     while (its--) {
1095       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1096       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1097 
1098       /* update rhs: bb1 = bb - B*x */
1099       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1100       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1101 
1102       /* local sweep */
1103       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1104       CHKERRQ(ierr);
1105     }
1106   } else {
1107     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1108   }
1109 
1110   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1116 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1117 {
1118   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1119   Mat        A = mat->A,B = mat->B;
1120   int        ierr;
1121   PetscReal  isend[5],irecv[5];
1122 
1123   PetscFunctionBegin;
1124   info->block_size     = 1.0;
1125   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1126   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1127   isend[3] = info->memory;  isend[4] = info->mallocs;
1128   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1129   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1130   isend[3] += info->memory;  isend[4] += info->mallocs;
1131   if (flag == MAT_LOCAL) {
1132     info->nz_used      = isend[0];
1133     info->nz_allocated = isend[1];
1134     info->nz_unneeded  = isend[2];
1135     info->memory       = isend[3];
1136     info->mallocs      = isend[4];
1137   } else if (flag == MAT_GLOBAL_MAX) {
1138     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1139     info->nz_used      = irecv[0];
1140     info->nz_allocated = irecv[1];
1141     info->nz_unneeded  = irecv[2];
1142     info->memory       = irecv[3];
1143     info->mallocs      = irecv[4];
1144   } else if (flag == MAT_GLOBAL_SUM) {
1145     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1146     info->nz_used      = irecv[0];
1147     info->nz_allocated = irecv[1];
1148     info->nz_unneeded  = irecv[2];
1149     info->memory       = irecv[3];
1150     info->mallocs      = irecv[4];
1151   }
1152   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1153   info->fill_ratio_needed = 0;
1154   info->factor_mallocs    = 0;
1155   info->rows_global       = (double)matin->M;
1156   info->columns_global    = (double)matin->N;
1157   info->rows_local        = (double)matin->m;
1158   info->columns_local     = (double)matin->N;
1159 
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatSetOption_MPIAIJ"
1165 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1166 {
1167   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1168   int        ierr;
1169 
1170   PetscFunctionBegin;
1171   switch (op) {
1172   case MAT_NO_NEW_NONZERO_LOCATIONS:
1173   case MAT_YES_NEW_NONZERO_LOCATIONS:
1174   case MAT_COLUMNS_UNSORTED:
1175   case MAT_COLUMNS_SORTED:
1176   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1177   case MAT_KEEP_ZEROED_ROWS:
1178   case MAT_NEW_NONZERO_LOCATION_ERR:
1179   case MAT_USE_INODES:
1180   case MAT_DO_NOT_USE_INODES:
1181   case MAT_IGNORE_ZERO_ENTRIES:
1182     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1183     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1184     break;
1185   case MAT_ROW_ORIENTED:
1186     a->roworiented = PETSC_TRUE;
1187     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1188     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1189     break;
1190   case MAT_ROWS_SORTED:
1191   case MAT_ROWS_UNSORTED:
1192   case MAT_YES_NEW_DIAGONALS:
1193     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1194     break;
1195   case MAT_COLUMN_ORIENTED:
1196     a->roworiented = PETSC_FALSE;
1197     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1198     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1199     break;
1200   case MAT_IGNORE_OFF_PROC_ENTRIES:
1201     a->donotstash = PETSC_TRUE;
1202     break;
1203   case MAT_NO_NEW_DIAGONALS:
1204     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1205   default:
1206     SETERRQ(PETSC_ERR_SUP,"unknown option");
1207   }
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatGetRow_MPIAIJ"
1213 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1214 {
1215   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1216   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1217   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1218   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1219   int          *cmap,*idx_p;
1220 
1221   PetscFunctionBegin;
1222   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1223   mat->getrowactive = PETSC_TRUE;
1224 
1225   if (!mat->rowvalues && (idx || v)) {
1226     /*
1227         allocate enough space to hold information from the longest row.
1228     */
1229     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1230     int     max = 1,tmp;
1231     for (i=0; i<matin->m; i++) {
1232       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1233       if (max < tmp) { max = tmp; }
1234     }
1235     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1236     mat->rowindices = (int*)(mat->rowvalues + max);
1237   }
1238 
1239   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1240   lrow = row - rstart;
1241 
1242   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1243   if (!v)   {pvA = 0; pvB = 0;}
1244   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1245   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1246   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1247   nztot = nzA + nzB;
1248 
1249   cmap  = mat->garray;
1250   if (v  || idx) {
1251     if (nztot) {
1252       /* Sort by increasing column numbers, assuming A and B already sorted */
1253       int imark = -1;
1254       if (v) {
1255         *v = v_p = mat->rowvalues;
1256         for (i=0; i<nzB; i++) {
1257           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1258           else break;
1259         }
1260         imark = i;
1261         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1262         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1263       }
1264       if (idx) {
1265         *idx = idx_p = mat->rowindices;
1266         if (imark > -1) {
1267           for (i=0; i<imark; i++) {
1268             idx_p[i] = cmap[cworkB[i]];
1269           }
1270         } else {
1271           for (i=0; i<nzB; i++) {
1272             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1273             else break;
1274           }
1275           imark = i;
1276         }
1277         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1278         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1279       }
1280     } else {
1281       if (idx) *idx = 0;
1282       if (v)   *v   = 0;
1283     }
1284   }
1285   *nz = nztot;
1286   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1287   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1293 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1294 {
1295   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1296 
1297   PetscFunctionBegin;
1298   if (aij->getrowactive == PETSC_FALSE) {
1299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1300   }
1301   aij->getrowactive = PETSC_FALSE;
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatNorm_MPIAIJ"
1307 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1308 {
1309   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1310   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1311   int          ierr,i,j,cstart = aij->cstart;
1312   PetscReal    sum = 0.0;
1313   PetscScalar  *v;
1314 
1315   PetscFunctionBegin;
1316   if (aij->size == 1) {
1317     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1318   } else {
1319     if (type == NORM_FROBENIUS) {
1320       v = amat->a;
1321       for (i=0; i<amat->nz; i++) {
1322 #if defined(PETSC_USE_COMPLEX)
1323         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1324 #else
1325         sum += (*v)*(*v); v++;
1326 #endif
1327       }
1328       v = bmat->a;
1329       for (i=0; i<bmat->nz; i++) {
1330 #if defined(PETSC_USE_COMPLEX)
1331         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1332 #else
1333         sum += (*v)*(*v); v++;
1334 #endif
1335       }
1336       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1337       *norm = sqrt(*norm);
1338     } else if (type == NORM_1) { /* max column norm */
1339       PetscReal *tmp,*tmp2;
1340       int    *jj,*garray = aij->garray;
1341       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1342       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1343       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1344       *norm = 0.0;
1345       v = amat->a; jj = amat->j;
1346       for (j=0; j<amat->nz; j++) {
1347         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1348       }
1349       v = bmat->a; jj = bmat->j;
1350       for (j=0; j<bmat->nz; j++) {
1351         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1352       }
1353       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1354       for (j=0; j<mat->N; j++) {
1355         if (tmp2[j] > *norm) *norm = tmp2[j];
1356       }
1357       ierr = PetscFree(tmp);CHKERRQ(ierr);
1358       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1359     } else if (type == NORM_INFINITY) { /* max row norm */
1360       PetscReal ntemp = 0.0;
1361       for (j=0; j<aij->A->m; j++) {
1362         v = amat->a + amat->i[j];
1363         sum = 0.0;
1364         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1365           sum += PetscAbsScalar(*v); v++;
1366         }
1367         v = bmat->a + bmat->i[j];
1368         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1369           sum += PetscAbsScalar(*v); v++;
1370         }
1371         if (sum > ntemp) ntemp = sum;
1372       }
1373       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1374     } else {
1375       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1376     }
1377   }
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatTranspose_MPIAIJ"
1383 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1384 {
1385   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1386   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1387   int          ierr;
1388   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1389   Mat          B;
1390   PetscScalar  *array;
1391 
1392   PetscFunctionBegin;
1393   if (!matout && M != N) {
1394     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1395   }
1396 
1397   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1398 
1399   /* copy over the A part */
1400   Aloc = (Mat_SeqAIJ*)a->A->data;
1401   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1402   row = a->rstart;
1403   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1404   for (i=0; i<m; i++) {
1405     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1406     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1407   }
1408   aj = Aloc->j;
1409   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1410 
1411   /* copy over the B part */
1412   Aloc = (Mat_SeqAIJ*)a->B->data;
1413   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1414   row  = a->rstart;
1415   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1416   ct   = cols;
1417   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1418   for (i=0; i<m; i++) {
1419     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1420     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1421   }
1422   ierr = PetscFree(ct);CHKERRQ(ierr);
1423   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1424   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1425   if (matout) {
1426     *matout = B;
1427   } else {
1428     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1429   }
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1435 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1436 {
1437   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1438   Mat        a = aij->A,b = aij->B;
1439   int        ierr,s1,s2,s3;
1440 
1441   PetscFunctionBegin;
1442   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1443   if (rr) {
1444     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1445     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1446     /* Overlap communication with computation. */
1447     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1448   }
1449   if (ll) {
1450     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1451     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1452     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1453   }
1454   /* scale  the diagonal block */
1455   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1456 
1457   if (rr) {
1458     /* Do a scatter end and then right scale the off-diagonal block */
1459     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1460     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1461   }
1462 
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1469 int MatPrintHelp_MPIAIJ(Mat A)
1470 {
1471   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1472   int        ierr;
1473 
1474   PetscFunctionBegin;
1475   if (!a->rank) {
1476     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1477   }
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1483 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1484 {
1485   PetscFunctionBegin;
1486   *bs = 1;
1487   PetscFunctionReturn(0);
1488 }
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1491 int MatSetUnfactored_MPIAIJ(Mat A)
1492 {
1493   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1494   int        ierr;
1495 
1496   PetscFunctionBegin;
1497   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatEqual_MPIAIJ"
1503 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1504 {
1505   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1506   Mat        a,b,c,d;
1507   PetscTruth flg;
1508   int        ierr;
1509 
1510   PetscFunctionBegin;
1511   a = matA->A; b = matA->B;
1512   c = matB->A; d = matB->B;
1513 
1514   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1515   if (flg == PETSC_TRUE) {
1516     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1517   }
1518   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatCopy_MPIAIJ"
1524 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1525 {
1526   int        ierr;
1527   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1528   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1529 
1530   PetscFunctionBegin;
1531   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1532   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1533     /* because of the column compression in the off-processor part of the matrix a->B,
1534        the number of columns in a->B and b->B may be different, hence we cannot call
1535        the MatCopy() directly on the two parts. If need be, we can provide a more
1536        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1537        then copying the submatrices */
1538     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1539   } else {
1540     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1541     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1542   }
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1548 int MatSetUpPreallocation_MPIAIJ(Mat A)
1549 {
1550   int        ierr;
1551 
1552   PetscFunctionBegin;
1553   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1558 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1559 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1560 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1561 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1562 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1563 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1564 #endif
1565 
1566 #include "petscblaslapack.h"
1567 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure);
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatAXPY_MPIAIJ"
1570 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1571 {
1572   int        ierr,one=1,i;
1573   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1574   Mat_SeqAIJ *x,*y;
1575 
1576   PetscFunctionBegin;
1577   if (str == SAME_NONZERO_PATTERN) {
1578     x = (Mat_SeqAIJ *)xx->A->data;
1579     y = (Mat_SeqAIJ *)yy->A->data;
1580     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1581     x = (Mat_SeqAIJ *)xx->B->data;
1582     y = (Mat_SeqAIJ *)yy->B->data;
1583     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1584   } else if (str == SUBSET_NONZERO_PATTERN) {
1585     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1586 
1587     x = (Mat_SeqAIJ *)xx->B->data;
1588     y = (Mat_SeqAIJ *)yy->B->data;
1589     if (y->xtoy && y->XtoY != xx->B) {
1590       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1591       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1592     }
1593     if (!y->xtoy) { /* get xtoy */
1594       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1595       y->XtoY = xx->B;
1596     }
1597     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1598   } else {
1599     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1600   }
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 /* -------------------------------------------------------------------*/
1605 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1606        MatGetRow_MPIAIJ,
1607        MatRestoreRow_MPIAIJ,
1608        MatMult_MPIAIJ,
1609        MatMultAdd_MPIAIJ,
1610        MatMultTranspose_MPIAIJ,
1611        MatMultTransposeAdd_MPIAIJ,
1612        0,
1613        0,
1614        0,
1615        0,
1616        0,
1617        0,
1618        MatRelax_MPIAIJ,
1619        MatTranspose_MPIAIJ,
1620        MatGetInfo_MPIAIJ,
1621        MatEqual_MPIAIJ,
1622        MatGetDiagonal_MPIAIJ,
1623        MatDiagonalScale_MPIAIJ,
1624        MatNorm_MPIAIJ,
1625        MatAssemblyBegin_MPIAIJ,
1626        MatAssemblyEnd_MPIAIJ,
1627        0,
1628        MatSetOption_MPIAIJ,
1629        MatZeroEntries_MPIAIJ,
1630        MatZeroRows_MPIAIJ,
1631 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1632        MatLUFactorSymbolic_MPIAIJ_TFS,
1633 #else
1634        0,
1635 #endif
1636        0,
1637        0,
1638        0,
1639        MatSetUpPreallocation_MPIAIJ,
1640        0,
1641        0,
1642        0,
1643        0,
1644        MatDuplicate_MPIAIJ,
1645        0,
1646        0,
1647        0,
1648        0,
1649        MatAXPY_MPIAIJ,
1650        MatGetSubMatrices_MPIAIJ,
1651        MatIncreaseOverlap_MPIAIJ,
1652        MatGetValues_MPIAIJ,
1653        MatCopy_MPIAIJ,
1654        MatPrintHelp_MPIAIJ,
1655        MatScale_MPIAIJ,
1656        0,
1657        0,
1658        0,
1659        MatGetBlockSize_MPIAIJ,
1660        0,
1661        0,
1662        0,
1663        0,
1664        MatFDColoringCreate_MPIAIJ,
1665        0,
1666        MatSetUnfactored_MPIAIJ,
1667        0,
1668        0,
1669        MatGetSubMatrix_MPIAIJ,
1670        MatDestroy_MPIAIJ,
1671        MatView_MPIAIJ,
1672        MatGetPetscMaps_Petsc,
1673        0,
1674        0,
1675        0,
1676        0,
1677        0,
1678        0,
1679        0,
1680        0,
1681        MatSetColoring_MPIAIJ,
1682        MatSetValuesAdic_MPIAIJ,
1683        MatSetValuesAdifor_MPIAIJ
1684 };
1685 
1686 /* ----------------------------------------------------------------------------------------*/
1687 
1688 EXTERN_C_BEGIN
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1691 int MatStoreValues_MPIAIJ(Mat mat)
1692 {
1693   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1694   int        ierr;
1695 
1696   PetscFunctionBegin;
1697   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1698   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 EXTERN_C_END
1702 
1703 EXTERN_C_BEGIN
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1706 int MatRetrieveValues_MPIAIJ(Mat mat)
1707 {
1708   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1709   int        ierr;
1710 
1711   PetscFunctionBegin;
1712   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1713   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1714   PetscFunctionReturn(0);
1715 }
1716 EXTERN_C_END
1717 
1718 #include "petscpc.h"
1719 EXTERN_C_BEGIN
1720 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1721 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec);
1722 EXTERN_C_END
1723 
1724 EXTERN_C_BEGIN
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1727 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1728 {
1729   Mat_MPIAIJ   *b;
1730   int          ierr,i;
1731 
1732   PetscFunctionBegin;
1733   B->preallocated = PETSC_TRUE;
1734   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1735   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1736   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1737   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1738   if (d_nnz) {
1739     for (i=0; i<B->m; i++) {
1740       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1741     }
1742   }
1743   if (o_nnz) {
1744     for (i=0; i<B->m; i++) {
1745       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1746     }
1747   }
1748   b = (Mat_MPIAIJ*)B->data;
1749 
1750   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1751   PetscLogObjectParent(B,b->A);
1752   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1753   PetscLogObjectParent(B,b->B);
1754 
1755   PetscFunctionReturn(0);
1756 }
1757 EXTERN_C_END
1758 
1759 EXTERN_C_BEGIN
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatCreate_MPIAIJ"
1762 int MatCreate_MPIAIJ(Mat B)
1763 {
1764   Mat_MPIAIJ *b;
1765   int        ierr,i,size;
1766 
1767   PetscFunctionBegin;
1768   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1769 
1770   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1771   B->data         = (void*)b;
1772   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1773   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1774   B->factor       = 0;
1775   B->assembled    = PETSC_FALSE;
1776   B->mapping      = 0;
1777 
1778   B->insertmode      = NOT_SET_VALUES;
1779   b->size            = size;
1780   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1781 
1782   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1783   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1784 
1785   /* the information in the maps duplicates the information computed below, eventually
1786      we should remove the duplicate information that is not contained in the maps */
1787   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1788   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1789 
1790   /* build local table of row and column ownerships */
1791   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1792   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1793   b->cowners = b->rowners + b->size + 2;
1794   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1795   b->rowners[0] = 0;
1796   for (i=2; i<=b->size; i++) {
1797     b->rowners[i] += b->rowners[i-1];
1798   }
1799   b->rstart = b->rowners[b->rank];
1800   b->rend   = b->rowners[b->rank+1];
1801   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1802   b->cowners[0] = 0;
1803   for (i=2; i<=b->size; i++) {
1804     b->cowners[i] += b->cowners[i-1];
1805   }
1806   b->cstart = b->cowners[b->rank];
1807   b->cend   = b->cowners[b->rank+1];
1808 
1809   /* build cache for off array entries formed */
1810   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1811   b->donotstash  = PETSC_FALSE;
1812   b->colmap      = 0;
1813   b->garray      = 0;
1814   b->roworiented = PETSC_TRUE;
1815 
1816   /* stuff used for matrix vector multiply */
1817   b->lvec      = PETSC_NULL;
1818   b->Mvctx     = PETSC_NULL;
1819 
1820   /* stuff for MatGetRow() */
1821   b->rowindices   = 0;
1822   b->rowvalues    = 0;
1823   b->getrowactive = PETSC_FALSE;
1824 
1825   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1826                                      "MatStoreValues_MPIAIJ",
1827                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1828   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1829                                      "MatRetrieveValues_MPIAIJ",
1830                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1831   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1832 				     "MatGetDiagonalBlock_MPIAIJ",
1833                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1834   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1835 				     "MatIsSymmetric_MPIAIJ",
1836 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1838 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1839 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1840   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1841 				     "MatDiagonalScaleLocal_MPIAIJ",
1842 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 EXTERN_C_END
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1849 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1850 {
1851   Mat        mat;
1852   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1853   int        ierr;
1854 
1855   PetscFunctionBegin;
1856   *newmat       = 0;
1857   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1858   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1859   a    = (Mat_MPIAIJ*)mat->data;
1860   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1861   mat->factor       = matin->factor;
1862   mat->assembled    = PETSC_TRUE;
1863   mat->insertmode   = NOT_SET_VALUES;
1864   mat->preallocated = PETSC_TRUE;
1865 
1866   a->rstart       = oldmat->rstart;
1867   a->rend         = oldmat->rend;
1868   a->cstart       = oldmat->cstart;
1869   a->cend         = oldmat->cend;
1870   a->size         = oldmat->size;
1871   a->rank         = oldmat->rank;
1872   a->donotstash   = oldmat->donotstash;
1873   a->roworiented  = oldmat->roworiented;
1874   a->rowindices   = 0;
1875   a->rowvalues    = 0;
1876   a->getrowactive = PETSC_FALSE;
1877 
1878   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1879   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1880   if (oldmat->colmap) {
1881 #if defined (PETSC_USE_CTABLE)
1882     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1883 #else
1884     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1885     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1886     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1887 #endif
1888   } else a->colmap = 0;
1889   if (oldmat->garray) {
1890     int len;
1891     len  = oldmat->B->n;
1892     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1893     PetscLogObjectMemory(mat,len*sizeof(int));
1894     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1895   } else a->garray = 0;
1896 
1897   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1898   PetscLogObjectParent(mat,a->lvec);
1899   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1900   PetscLogObjectParent(mat,a->Mvctx);
1901   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1902   PetscLogObjectParent(mat,a->A);
1903   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1904   PetscLogObjectParent(mat,a->B);
1905   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1906   *newmat = mat;
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #include "petscsys.h"
1911 
1912 EXTERN_C_BEGIN
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatLoad_MPIAIJ"
1915 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1916 {
1917   Mat          A;
1918   PetscScalar  *vals,*svals;
1919   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1920   MPI_Status   status;
1921   int          i,nz,ierr,j,rstart,rend,fd;
1922   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1923   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1924   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1925 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS)
1926   PetscTruth   flag;
1927 #endif
1928 
1929   PetscFunctionBegin;
1930   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1931   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1932   if (!rank) {
1933     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1934     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1935     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1936     if (header[3] < 0) {
1937       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1938     }
1939   }
1940 
1941   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1942   M = header[1]; N = header[2];
1943   /* determine ownership of all rows */
1944   m = M/size + ((M % size) > rank);
1945   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1946   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1947   rowners[0] = 0;
1948   for (i=2; i<=size; i++) {
1949     rowners[i] += rowners[i-1];
1950   }
1951   rstart = rowners[rank];
1952   rend   = rowners[rank+1];
1953 
1954   /* distribute row lengths to all processors */
1955   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1956   offlens = ourlens + (rend-rstart);
1957   if (!rank) {
1958     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1959     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1960     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1961     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1962     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1963     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1964   } else {
1965     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1966   }
1967 
1968   if (!rank) {
1969     /* calculate the number of nonzeros on each processor */
1970     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1971     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1972     for (i=0; i<size; i++) {
1973       for (j=rowners[i]; j< rowners[i+1]; j++) {
1974         procsnz[i] += rowlengths[j];
1975       }
1976     }
1977     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1978 
1979     /* determine max buffer needed and allocate it */
1980     maxnz = 0;
1981     for (i=0; i<size; i++) {
1982       maxnz = PetscMax(maxnz,procsnz[i]);
1983     }
1984     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1985 
1986     /* read in my part of the matrix column indices  */
1987     nz   = procsnz[0];
1988     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1989     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1990 
1991     /* read in every one elses and ship off */
1992     for (i=1; i<size; i++) {
1993       nz   = procsnz[i];
1994       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1995       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1996     }
1997     ierr = PetscFree(cols);CHKERRQ(ierr);
1998   } else {
1999     /* determine buffer space needed for message */
2000     nz = 0;
2001     for (i=0; i<m; i++) {
2002       nz += ourlens[i];
2003     }
2004     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2005 
2006     /* receive message of column indices*/
2007     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2008     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2009     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2010   }
2011 
2012   /* determine column ownership if matrix is not square */
2013   if (N != M) {
2014     n      = N/size + ((N % size) > rank);
2015     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2016     cstart = cend - n;
2017   } else {
2018     cstart = rstart;
2019     cend   = rend;
2020     n      = cend - cstart;
2021   }
2022 
2023   /* loop over local rows, determining number of off diagonal entries */
2024   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2025   jj = 0;
2026   for (i=0; i<m; i++) {
2027     for (j=0; j<ourlens[i]; j++) {
2028       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2029       jj++;
2030     }
2031   }
2032 
2033   /* create our matrix */
2034   for (i=0; i<m; i++) {
2035     ourlens[i] -= offlens[i];
2036   }
2037   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2038   A = *newmat;
2039   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2040   for (i=0; i<m; i++) {
2041     ourlens[i] += offlens[i];
2042   }
2043 
2044   if (!rank) {
2045     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2046 
2047     /* read in my part of the matrix numerical values  */
2048     nz   = procsnz[0];
2049     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2050 
2051     /* insert into matrix */
2052     jj      = rstart;
2053     smycols = mycols;
2054     svals   = vals;
2055     for (i=0; i<m; i++) {
2056       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2057       smycols += ourlens[i];
2058       svals   += ourlens[i];
2059       jj++;
2060     }
2061 
2062     /* read in other processors and ship out */
2063     for (i=1; i<size; i++) {
2064       nz   = procsnz[i];
2065       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2066       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2067     }
2068     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2069   } else {
2070     /* receive numeric values */
2071     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2072 
2073     /* receive message of values*/
2074     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2075     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2076     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2077 
2078     /* insert into matrix */
2079     jj      = rstart;
2080     smycols = mycols;
2081     svals   = vals;
2082     for (i=0; i<m; i++) {
2083       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2084       smycols += ourlens[i];
2085       svals   += ourlens[i];
2086       jj++;
2087     }
2088   }
2089   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2090   ierr = PetscFree(vals);CHKERRQ(ierr);
2091   ierr = PetscFree(mycols);CHKERRQ(ierr);
2092   ierr = PetscFree(rowners);CHKERRQ(ierr);
2093 
2094   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096 #if defined(PETSC_HAVE_SPOOLES)
2097   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
2098   if (flag) {
2099     if (size == 1) {
2100       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
2101     } else {
2102       ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr);
2103     }
2104   }
2105 #endif
2106 #if defined(PETSC_HAVE_SUPERLUDIST)
2107   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
2108   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
2109 #endif
2110 #if defined(PETSC_HAVE_MUMPS)
2111   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
2112   if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); }
2113 #endif
2114   PetscFunctionReturn(0);
2115 }
2116 EXTERN_C_END
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2120 /*
2121     Not great since it makes two copies of the submatrix, first an SeqAIJ
2122   in local and then by concatenating the local matrices the end result.
2123   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2124 */
2125 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2126 {
2127   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2128   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2129   Mat          *local,M,Mreuse;
2130   PetscScalar  *vwork,*aa;
2131   MPI_Comm     comm = mat->comm;
2132   Mat_SeqAIJ   *aij;
2133 
2134 
2135   PetscFunctionBegin;
2136   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2137   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2138 
2139   if (call ==  MAT_REUSE_MATRIX) {
2140     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2141     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2142     local = &Mreuse;
2143     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2144   } else {
2145     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2146     Mreuse = *local;
2147     ierr   = PetscFree(local);CHKERRQ(ierr);
2148   }
2149 
2150   /*
2151       m - number of local rows
2152       n - number of columns (same on all processors)
2153       rstart - first row in new global matrix generated
2154   */
2155   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2156   if (call == MAT_INITIAL_MATRIX) {
2157     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2158     ii  = aij->i;
2159     jj  = aij->j;
2160 
2161     /*
2162         Determine the number of non-zeros in the diagonal and off-diagonal
2163         portions of the matrix in order to do correct preallocation
2164     */
2165 
2166     /* first get start and end of "diagonal" columns */
2167     if (csize == PETSC_DECIDE) {
2168       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2169       if (mglobal == n) { /* square matrix */
2170 	nlocal = m;
2171       } else {
2172         nlocal = n/size + ((n % size) > rank);
2173       }
2174     } else {
2175       nlocal = csize;
2176     }
2177     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2178     rstart = rend - nlocal;
2179     if (rank == size - 1 && rend != n) {
2180       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2181     }
2182 
2183     /* next, compute all the lengths */
2184     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2185     olens = dlens + m;
2186     for (i=0; i<m; i++) {
2187       jend = ii[i+1] - ii[i];
2188       olen = 0;
2189       dlen = 0;
2190       for (j=0; j<jend; j++) {
2191         if (*jj < rstart || *jj >= rend) olen++;
2192         else dlen++;
2193         jj++;
2194       }
2195       olens[i] = olen;
2196       dlens[i] = dlen;
2197     }
2198     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2199     ierr = PetscFree(dlens);CHKERRQ(ierr);
2200   } else {
2201     int ml,nl;
2202 
2203     M = *newmat;
2204     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2205     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2206     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2207     /*
2208          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2209        rather than the slower MatSetValues().
2210     */
2211     M->was_assembled = PETSC_TRUE;
2212     M->assembled     = PETSC_FALSE;
2213   }
2214   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2215   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2216   ii  = aij->i;
2217   jj  = aij->j;
2218   aa  = aij->a;
2219   for (i=0; i<m; i++) {
2220     row   = rstart + i;
2221     nz    = ii[i+1] - ii[i];
2222     cwork = jj;     jj += nz;
2223     vwork = aa;     aa += nz;
2224     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2225   }
2226 
2227   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2228   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2229   *newmat = M;
2230 
2231   /* save submatrix used in processor for next request */
2232   if (call ==  MAT_INITIAL_MATRIX) {
2233     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2234     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2235   }
2236 
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 #undef __FUNCT__
2241 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2242 /*@C
2243    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2244    (the default parallel PETSc format).  For good matrix assembly performance
2245    the user should preallocate the matrix storage by setting the parameters
2246    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2247    performance can be increased by more than a factor of 50.
2248 
2249    Collective on MPI_Comm
2250 
2251    Input Parameters:
2252 +  A - the matrix
2253 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2254            (same value is used for all local rows)
2255 .  d_nnz - array containing the number of nonzeros in the various rows of the
2256            DIAGONAL portion of the local submatrix (possibly different for each row)
2257            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2258            The size of this array is equal to the number of local rows, i.e 'm'.
2259            You must leave room for the diagonal entry even if it is zero.
2260 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2261            submatrix (same value is used for all local rows).
2262 -  o_nnz - array containing the number of nonzeros in the various rows of the
2263            OFF-DIAGONAL portion of the local submatrix (possibly different for
2264            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2265            structure. The size of this array is equal to the number
2266            of local rows, i.e 'm'.
2267 
2268    The AIJ format (also called the Yale sparse matrix format or
2269    compressed row storage), is fully compatible with standard Fortran 77
2270    storage.  That is, the stored row and column indices can begin at
2271    either one (as in Fortran) or zero.  See the users manual for details.
2272 
2273    The user MUST specify either the local or global matrix dimensions
2274    (possibly both).
2275 
2276    The parallel matrix is partitioned such that the first m0 rows belong to
2277    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2278    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2279 
2280    The DIAGONAL portion of the local submatrix of a processor can be defined
2281    as the submatrix which is obtained by extraction the part corresponding
2282    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2283    first row that belongs to the processor, and r2 is the last row belonging
2284    to the this processor. This is a square mxm matrix. The remaining portion
2285    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2286 
2287    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2288 
2289    By default, this format uses inodes (identical nodes) when possible.
2290    We search for consecutive rows with the same nonzero structure, thereby
2291    reusing matrix information to achieve increased efficiency.
2292 
2293    Options Database Keys:
2294 +  -mat_aij_no_inode  - Do not use inodes
2295 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2296 -  -mat_aij_oneindex - Internally use indexing starting at 1
2297         rather than 0.  Note that when calling MatSetValues(),
2298         the user still MUST index entries starting at 0!
2299 
2300    Example usage:
2301 
2302    Consider the following 8x8 matrix with 34 non-zero values, that is
2303    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2304    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2305    as follows:
2306 
2307 .vb
2308             1  2  0  |  0  3  0  |  0  4
2309     Proc0   0  5  6  |  7  0  0  |  8  0
2310             9  0 10  | 11  0  0  | 12  0
2311     -------------------------------------
2312            13  0 14  | 15 16 17  |  0  0
2313     Proc1   0 18  0  | 19 20 21  |  0  0
2314             0  0  0  | 22 23  0  | 24  0
2315     -------------------------------------
2316     Proc2  25 26 27  |  0  0 28  | 29  0
2317            30  0  0  | 31 32 33  |  0 34
2318 .ve
2319 
2320    This can be represented as a collection of submatrices as:
2321 
2322 .vb
2323       A B C
2324       D E F
2325       G H I
2326 .ve
2327 
2328    Where the submatrices A,B,C are owned by proc0, D,E,F are
2329    owned by proc1, G,H,I are owned by proc2.
2330 
2331    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2332    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2333    The 'M','N' parameters are 8,8, and have the same values on all procs.
2334 
2335    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2336    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2337    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2338    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2339    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2340    matrix, ans [DF] as another SeqAIJ matrix.
2341 
2342    When d_nz, o_nz parameters are specified, d_nz storage elements are
2343    allocated for every row of the local diagonal submatrix, and o_nz
2344    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2345    One way to choose d_nz and o_nz is to use the max nonzerors per local
2346    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2347    In this case, the values of d_nz,o_nz are:
2348 .vb
2349      proc0 : dnz = 2, o_nz = 2
2350      proc1 : dnz = 3, o_nz = 2
2351      proc2 : dnz = 1, o_nz = 4
2352 .ve
2353    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2354    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2355    for proc3. i.e we are using 12+15+10=37 storage locations to store
2356    34 values.
2357 
2358    When d_nnz, o_nnz parameters are specified, the storage is specified
2359    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2360    In the above case the values for d_nnz,o_nnz are:
2361 .vb
2362      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2363      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2364      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2365 .ve
2366    Here the space allocated is sum of all the above values i.e 34, and
2367    hence pre-allocation is perfect.
2368 
2369    Level: intermediate
2370 
2371 .keywords: matrix, aij, compressed row, sparse, parallel
2372 
2373 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2374 @*/
2375 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2376 {
2377   int ierr,(*f)(Mat,int,int*,int,int*);
2378 
2379   PetscFunctionBegin;
2380   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2381   if (f) {
2382     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2383   }
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatCreateMPIAIJ"
2389 /*@C
2390    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2391    (the default parallel PETSc format).  For good matrix assembly performance
2392    the user should preallocate the matrix storage by setting the parameters
2393    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2394    performance can be increased by more than a factor of 50.
2395 
2396    Collective on MPI_Comm
2397 
2398    Input Parameters:
2399 +  comm - MPI communicator
2400 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2401            This value should be the same as the local size used in creating the
2402            y vector for the matrix-vector product y = Ax.
2403 .  n - This value should be the same as the local size used in creating the
2404        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2405        calculated if N is given) For square matrices n is almost always m.
2406 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2407 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2408 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2409            (same value is used for all local rows)
2410 .  d_nnz - array containing the number of nonzeros in the various rows of the
2411            DIAGONAL portion of the local submatrix (possibly different for each row)
2412            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2413            The size of this array is equal to the number of local rows, i.e 'm'.
2414            You must leave room for the diagonal entry even if it is zero.
2415 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2416            submatrix (same value is used for all local rows).
2417 -  o_nnz - array containing the number of nonzeros in the various rows of the
2418            OFF-DIAGONAL portion of the local submatrix (possibly different for
2419            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2420            structure. The size of this array is equal to the number
2421            of local rows, i.e 'm'.
2422 
2423    Output Parameter:
2424 .  A - the matrix
2425 
2426    Notes:
2427    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2428    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2429    storage requirements for this matrix.
2430 
2431    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2432    processor than it must be used on all processors that share the object for
2433    that argument.
2434 
2435    The AIJ format (also called the Yale sparse matrix format or
2436    compressed row storage), is fully compatible with standard Fortran 77
2437    storage.  That is, the stored row and column indices can begin at
2438    either one (as in Fortran) or zero.  See the users manual for details.
2439 
2440    The user MUST specify either the local or global matrix dimensions
2441    (possibly both).
2442 
2443    The parallel matrix is partitioned such that the first m0 rows belong to
2444    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2445    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2446 
2447    The DIAGONAL portion of the local submatrix of a processor can be defined
2448    as the submatrix which is obtained by extraction the part corresponding
2449    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2450    first row that belongs to the processor, and r2 is the last row belonging
2451    to the this processor. This is a square mxm matrix. The remaining portion
2452    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2453 
2454    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2455 
2456    By default, this format uses inodes (identical nodes) when possible.
2457    We search for consecutive rows with the same nonzero structure, thereby
2458    reusing matrix information to achieve increased efficiency.
2459 
2460    Options Database Keys:
2461 +  -mat_aij_no_inode  - Do not use inodes
2462 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2463 -  -mat_aij_oneindex - Internally use indexing starting at 1
2464         rather than 0.  Note that when calling MatSetValues(),
2465         the user still MUST index entries starting at 0!
2466 
2467 
2468    Example usage:
2469 
2470    Consider the following 8x8 matrix with 34 non-zero values, that is
2471    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2472    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2473    as follows:
2474 
2475 .vb
2476             1  2  0  |  0  3  0  |  0  4
2477     Proc0   0  5  6  |  7  0  0  |  8  0
2478             9  0 10  | 11  0  0  | 12  0
2479     -------------------------------------
2480            13  0 14  | 15 16 17  |  0  0
2481     Proc1   0 18  0  | 19 20 21  |  0  0
2482             0  0  0  | 22 23  0  | 24  0
2483     -------------------------------------
2484     Proc2  25 26 27  |  0  0 28  | 29  0
2485            30  0  0  | 31 32 33  |  0 34
2486 .ve
2487 
2488    This can be represented as a collection of submatrices as:
2489 
2490 .vb
2491       A B C
2492       D E F
2493       G H I
2494 .ve
2495 
2496    Where the submatrices A,B,C are owned by proc0, D,E,F are
2497    owned by proc1, G,H,I are owned by proc2.
2498 
2499    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2500    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2501    The 'M','N' parameters are 8,8, and have the same values on all procs.
2502 
2503    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2504    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2505    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2506    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2507    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2508    matrix, ans [DF] as another SeqAIJ matrix.
2509 
2510    When d_nz, o_nz parameters are specified, d_nz storage elements are
2511    allocated for every row of the local diagonal submatrix, and o_nz
2512    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2513    One way to choose d_nz and o_nz is to use the max nonzerors per local
2514    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2515    In this case, the values of d_nz,o_nz are:
2516 .vb
2517      proc0 : dnz = 2, o_nz = 2
2518      proc1 : dnz = 3, o_nz = 2
2519      proc2 : dnz = 1, o_nz = 4
2520 .ve
2521    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2522    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2523    for proc3. i.e we are using 12+15+10=37 storage locations to store
2524    34 values.
2525 
2526    When d_nnz, o_nnz parameters are specified, the storage is specified
2527    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2528    In the above case the values for d_nnz,o_nnz are:
2529 .vb
2530      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2531      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2532      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2533 .ve
2534    Here the space allocated is sum of all the above values i.e 34, and
2535    hence pre-allocation is perfect.
2536 
2537    Level: intermediate
2538 
2539 .keywords: matrix, aij, compressed row, sparse, parallel
2540 
2541 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2542 @*/
2543 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2544 {
2545   int ierr,size;
2546 
2547   PetscFunctionBegin;
2548   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2549   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2550   if (size > 1) {
2551     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2552     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2553   } else {
2554     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2555     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2556   }
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2562 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2563 {
2564   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2565   PetscFunctionBegin;
2566   *Ad     = a->A;
2567   *Ao     = a->B;
2568   *colmap = a->garray;
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 #undef __FUNCT__
2573 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2574 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2575 {
2576   int        ierr,i;
2577   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2578 
2579   PetscFunctionBegin;
2580   if (coloring->ctype == IS_COLORING_LOCAL) {
2581     ISColoringValue *allcolors,*colors;
2582     ISColoring      ocoloring;
2583 
2584     /* set coloring for diagonal portion */
2585     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2586 
2587     /* set coloring for off-diagonal portion */
2588     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2589     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2590     for (i=0; i<a->B->n; i++) {
2591       colors[i] = allcolors[a->garray[i]];
2592     }
2593     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2594     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2595     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2596     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2597   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2598     ISColoringValue *colors;
2599     int             *larray;
2600     ISColoring      ocoloring;
2601 
2602     /* set coloring for diagonal portion */
2603     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2604     for (i=0; i<a->A->n; i++) {
2605       larray[i] = i + a->cstart;
2606     }
2607     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2608     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2609     for (i=0; i<a->A->n; i++) {
2610       colors[i] = coloring->colors[larray[i]];
2611     }
2612     ierr = PetscFree(larray);CHKERRQ(ierr);
2613     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2614     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2615     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2616 
2617     /* set coloring for off-diagonal portion */
2618     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2619     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2620     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2621     for (i=0; i<a->B->n; i++) {
2622       colors[i] = coloring->colors[larray[i]];
2623     }
2624     ierr = PetscFree(larray);CHKERRQ(ierr);
2625     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2626     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2627     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2628   } else {
2629     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2630   }
2631 
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 #undef __FUNCT__
2636 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2637 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2638 {
2639   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2640   int        ierr;
2641 
2642   PetscFunctionBegin;
2643   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2644   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 #undef __FUNCT__
2649 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2650 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2651 {
2652   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2653   int        ierr;
2654 
2655   PetscFunctionBegin;
2656   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2657   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 #undef __FUNCT__
2662 #define __FUNCT__ "MatMerge"
2663 /*@C
2664       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2665                  matrices from each processor
2666 
2667     Collective on MPI_Comm
2668 
2669    Input Parameters:
2670 +    comm - the communicators the parallel matrix will live on
2671 -    inmat - the input sequential matrices
2672 
2673    Output Parameter:
2674 .    outmat - the parallel matrix generated
2675 
2676     Level: advanced
2677 
2678    Notes: The number of columns of the matrix in EACH of the seperate files
2679       MUST be the same.
2680 
2681 @*/
2682 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2683 {
2684   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2685   PetscScalar *values;
2686   PetscMap    columnmap,rowmap;
2687 
2688   PetscFunctionBegin;
2689 
2690   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2691 
2692   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2693   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2694   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2695   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2696   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2697   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2698 
2699   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2700   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2701   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2702   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2703   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2704 
2705   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2706   for (i=0;i<m;i++) {
2707     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2708     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2709     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2710   }
2711   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2712   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2713 
2714   for (i=0;i<m;i++) {
2715     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2716     I    = i + rstart;
2717     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2718     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2719   }
2720   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2721   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2722   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2723 
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 #undef __FUNCT__
2728 #define __FUNCT__ "MatFileSplit"
2729 int MatFileSplit(Mat A,char *outfile)
2730 {
2731   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2732   PetscViewer out;
2733   char        *name;
2734   Mat         B;
2735   PetscScalar *values;
2736 
2737   PetscFunctionBegin;
2738 
2739   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2740   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2741   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2742   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2743   for (i=0;i<m;i++) {
2744     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2745     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2746     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2747   }
2748   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2749   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2750 
2751   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2752   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2753   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2754   sprintf(name,"%s.%d",outfile,rank);
2755   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2756   ierr = PetscFree(name);
2757   ierr = MatView(B,out);CHKERRQ(ierr);
2758   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2759   ierr = MatDestroy(B);CHKERRQ(ierr);
2760   PetscFunctionReturn(0);
2761 }
2762