xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c3d102fe2dfc19c1832704c537829f04ceb136a2)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                    ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (PetscInt*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                    ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
133       if (nonew == 1) goto b_noinsert; \
134       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       if (nrow >= rmax) { \
136         /* there is no extra room in row, therefore enlarge */ \
137         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
138         PetscScalar *new_a; \
139  \
140         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
141  \
142         /* malloc new storage space */ \
143         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
144         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
145         new_j   = (PetscInt*)(new_a + new_nz); \
146         new_i   = new_j + new_nz; \
147  \
148         /* copy over old data into new slots */ \
149         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
150         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
151         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
152         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
153         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
154                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
156         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
157                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
158         /* free up old matrix storage */ \
159  \
160         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
161         if (!b->singlemalloc) { \
162           ierr = PetscFree(b->i);CHKERRQ(ierr); \
163           ierr = PetscFree(b->j);CHKERRQ(ierr); \
164         } \
165         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
166         b->singlemalloc = PETSC_TRUE; \
167  \
168         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
169         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
170         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
171         b->maxnz += CHUNKSIZE; \
172         b->reallocs++; \
173       } \
174       N = nrow++ - 1; b->nz++; \
175       /* shift up all the later entries in this row */ \
176       for (ii=N; ii>=_i; ii--) { \
177         rp[ii+1] = rp[ii]; \
178         ap[ii+1] = ap[ii]; \
179       } \
180       rp[_i] = col1;  \
181       ap[_i] = value;  \
182       b_noinsert: ; \
183       bilen[row] = nrow; \
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIAIJ"
188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
191   PetscScalar    value;
192   PetscErrorCode ierr;
193   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
194   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
195   PetscTruth     roworiented = aij->roworiented;
196 
197   /* Some Variables required in the macro */
198   Mat            A = aij->A;
199   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
200   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
201   PetscScalar    *aa = a->a;
202   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
203   Mat            B = aij->B;
204   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
205   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
206   PetscScalar    *ba = b->a;
207 
208   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
209   PetscInt       nonew = a->nonew,shift=0;
210   PetscScalar    *ap;
211 
212   PetscFunctionBegin;
213   for (i=0; i<m; i++) {
214     if (im[i] < 0) continue;
215 #if defined(PETSC_USE_DEBUG)
216     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
217 #endif
218     if (im[i] >= rstart && im[i] < rend) {
219       row = im[i] - rstart;
220       for (j=0; j<n; j++) {
221         if (in[j] >= cstart && in[j] < cend){
222           col = in[j] - cstart;
223           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
224           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
225           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         } else if (in[j] < 0) continue;
228 #if defined(PETSC_USE_DEBUG)
229         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
230 #endif
231         else {
232           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
233           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
234           if (mat->was_assembled) {
235             if (!aij->colmap) {
236               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
240 	    col--;
241 #else
242             col = aij->colmap[in[j]] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
245               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
248               B = aij->B;
249               b = (Mat_SeqAIJ*)B->data;
250               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
251               ba = b->a;
252             }
253           } else col = in[j];
254           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (!aij->donotstash) {
260         if (roworiented) {
261           if (ignorezeroentries && v[i*n] == 0.0) continue;
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
263         } else {
264           if (ignorezeroentries && v[i] == 0.0) continue;
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatGetValues_MPIAIJ"
275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
276 {
277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
278   PetscErrorCode ierr;
279   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
280   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
281 
282   PetscFunctionBegin;
283   for (i=0; i<m; i++) {
284     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
285     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
286     if (idxm[i] >= rstart && idxm[i] < rend) {
287       row = idxm[i] - rstart;
288       for (j=0; j<n; j++) {
289         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
290         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
291         if (idxn[j] >= cstart && idxn[j] < cend){
292           col = idxn[j] - cstart;
293           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
294         } else {
295           if (!aij->colmap) {
296             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297           }
298 #if defined (PETSC_USE_CTABLE)
299           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
300           col --;
301 #else
302           col = aij->colmap[idxn[j]] - 1;
303 #endif
304           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
305           else {
306             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
307           }
308         }
309       }
310     } else {
311       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
320 {
321   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
322   PetscErrorCode ierr;
323   PetscInt       nstash,reallocs;
324   InsertMode     addv;
325 
326   PetscFunctionBegin;
327   if (aij->donotstash) {
328     PetscFunctionReturn(0);
329   }
330 
331   /* make sure all processors are either in INSERTMODE or ADDMODE */
332   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
333   if (addv == (ADD_VALUES|INSERT_VALUES)) {
334     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
335   }
336   mat->insertmode = addv; /* in case this processor had no cache */
337 
338   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
339   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
340   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
347 {
348   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
349   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
350   PetscErrorCode ierr;
351   PetscMPIInt    n;
352   PetscInt       i,j,rstart,ncols,flg;
353   PetscInt       *row,*col,other_disassembled;
354   PetscScalar    *val;
355   InsertMode     addv = mat->insertmode;
356 
357   PetscFunctionBegin;
358   if (!aij->donotstash) {
359     while (1) {
360       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
361       if (!flg) break;
362 
363       for (i=0; i<n;) {
364         /* Now identify the consecutive vals belonging to the same row */
365         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
366         if (j < n) ncols = j-i;
367         else       ncols = n-i;
368         /* Now assemble all these values with a single function call */
369         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
370         i = j;
371       }
372     }
373     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
374   }
375   a->compressedrow.use     = PETSC_FALSE;
376   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
377   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
378 
379   /* determine if any processor has disassembled, if so we must
380      also disassemble ourselfs, in order that we may reassemble. */
381   /*
382      if nonzero structure of submatrix B cannot change then we know that
383      no processor disassembled thus we can skip this stuff
384   */
385   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
386     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
387     if (mat->was_assembled && !other_disassembled) {
388       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
389       /* reaccess the b because aij->B was changed */
390       b    = (Mat_SeqAIJ *)aij->B->data;
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397 
398   b->inode.use         = PETSC_FALSE;
399   b->compressedrow.use = PETSC_TRUE;
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412   PetscFunctionReturn(0);
413 }
414 
415 #undef __FUNCT__
416 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
418 {
419   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
420   PetscErrorCode ierr;
421 
422   PetscFunctionBegin;
423   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
424   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatZeroRows_MPIAIJ"
430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
431 {
432   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
433   PetscErrorCode ierr;
434   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
435   PetscInt       i,N,*rows,*owners = l->rowners;
436   PetscInt       *nprocs,j,idx,nsends,row;
437   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
438   PetscInt       *rvalues,count,base,slen,*source;
439   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
440   MPI_Comm       comm = A->comm;
441   MPI_Request    *send_waits,*recv_waits;
442   MPI_Status     recv_status,*send_status;
443   IS             istmp;
444 #if defined(PETSC_DEBUG)
445   PetscTruth     found = PETSC_FALSE;
446 #endif
447 
448   PetscFunctionBegin;
449   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
450   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
451 
452   /*  first count number of contributors to each processor */
453   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
454   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
455   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
456   j = 0;
457   for (i=0; i<N; i++) {
458     if (lastidx > (idx = rows[i])) j = 0;
459     lastidx = idx;
460     for (; j<size; j++) {
461       if (idx >= owners[j] && idx < owners[j+1]) {
462         nprocs[2*j]++;
463         nprocs[2*j+1] = 1;
464         owner[i] = j;
465 #if defined(PETSC_DEBUG)
466         found = PETSC_TRUE;
467 #endif
468         break;
469       }
470     }
471 #if defined(PETSC_DEBUG)
472     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
473     found = PETSC_FALSE;
474 #endif
475   }
476   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
477 
478   /* inform other processors of number of messages and max length*/
479   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
480 
481   /* post receives:   */
482   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
484   for (i=0; i<nrecvs; i++) {
485     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
486   }
487 
488   /* do sends:
489       1) starts[i] gives the starting index in svalues for stuff going to
490          the ith processor
491   */
492   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
493   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
494   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
495   starts[0] = 0;
496   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
497   for (i=0; i<N; i++) {
498     svalues[starts[owner[i]]++] = rows[i];
499   }
500   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
501 
502   starts[0] = 0;
503   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
504   count = 0;
505   for (i=0; i<size; i++) {
506     if (nprocs[2*i+1]) {
507       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
508     }
509   }
510   ierr = PetscFree(starts);CHKERRQ(ierr);
511 
512   base = owners[rank];
513 
514   /*  wait on receives */
515   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
516   source = lens + nrecvs;
517   count  = nrecvs; slen = 0;
518   while (count) {
519     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
520     /* unpack receives into our local space */
521     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
522     source[imdex]  = recv_status.MPI_SOURCE;
523     lens[imdex]    = n;
524     slen          += n;
525     count--;
526   }
527   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
528 
529   /* move the data into the send scatter */
530   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
531   count = 0;
532   for (i=0; i<nrecvs; i++) {
533     values = rvalues + i*nmax;
534     for (j=0; j<lens[i]; j++) {
535       lrows[count++] = values[j] - base;
536     }
537   }
538   ierr = PetscFree(rvalues);CHKERRQ(ierr);
539   ierr = PetscFree(lens);CHKERRQ(ierr);
540   ierr = PetscFree(owner);CHKERRQ(ierr);
541   ierr = PetscFree(nprocs);CHKERRQ(ierr);
542 
543   /* actually zap the local rows */
544   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
545   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
546 
547   /*
548         Zero the required rows. If the "diagonal block" of the matrix
549      is square and the user wishes to set the diagonal we use seperate
550      code so that MatSetValues() is not called for each diagonal allocating
551      new memory, thus calling lots of mallocs and slowing things down.
552 
553        Contributed by: Mathew Knepley
554   */
555   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
556   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
557   if (diag && (l->A->M == l->A->N)) {
558     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
559   } else if (diag) {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
562       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
564     }
565     for (i = 0; i < slen; i++) {
566       row  = lrows[i] + rstart;
567       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
568     }
569     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
571   } else {
572     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
573   }
574   ierr = ISDestroy(istmp);CHKERRQ(ierr);
575   ierr = PetscFree(lrows);CHKERRQ(ierr);
576 
577   /* wait on sends */
578   if (nsends) {
579     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
580     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
581     ierr = PetscFree(send_status);CHKERRQ(ierr);
582   }
583   ierr = PetscFree(send_waits);CHKERRQ(ierr);
584   ierr = PetscFree(svalues);CHKERRQ(ierr);
585 
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMult_MPIAIJ"
591 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
592 {
593   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
594   PetscErrorCode ierr;
595   PetscInt       nt;
596 
597   PetscFunctionBegin;
598   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
599   if (nt != A->n) {
600     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
601   }
602   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
603   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
604   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
605   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 
609 #undef __FUNCT__
610 #define __FUNCT__ "MatMultAdd_MPIAIJ"
611 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
612 {
613   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
614   PetscErrorCode ierr;
615 
616   PetscFunctionBegin;
617   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
618   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
619   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
620   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
626 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
627 {
628   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
629   PetscErrorCode ierr;
630   PetscTruth     merged;
631 
632   PetscFunctionBegin;
633   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   if (!merged) {
637     /* send it on its way */
638     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
639     /* do local part */
640     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
641     /* receive remote parts: note this assumes the values are not actually */
642     /* added in yy until the next line, */
643     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   } else {
645     /* do local part */
646     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
647     /* send it on its way */
648     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
649     /* values actually were received in the Begin() but we need to call this nop */
650     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
651   }
652   PetscFunctionReturn(0);
653 }
654 
655 EXTERN_C_BEGIN
656 #undef __FUNCT__
657 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
658 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
659 {
660   MPI_Comm       comm;
661   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
662   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
663   IS             Me,Notme;
664   PetscErrorCode ierr;
665   PetscInt       M,N,first,last,*notme,i;
666   PetscMPIInt    size;
667 
668   PetscFunctionBegin;
669 
670   /* Easy test: symmetric diagonal block */
671   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
672   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
673   if (!*f) PetscFunctionReturn(0);
674   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
675   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
676   if (size == 1) PetscFunctionReturn(0);
677 
678   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
679   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
680   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
681   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
682   for (i=0; i<first; i++) notme[i] = i;
683   for (i=last; i<M; i++) notme[i-last+first] = i;
684   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
685   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
686   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
687   Aoff = Aoffs[0];
688   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
689   Boff = Boffs[0];
690   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
691   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
692   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
693   ierr = ISDestroy(Me);CHKERRQ(ierr);
694   ierr = ISDestroy(Notme);CHKERRQ(ierr);
695 
696   PetscFunctionReturn(0);
697 }
698 EXTERN_C_END
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
702 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
703 {
704   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
705   PetscErrorCode ierr;
706 
707   PetscFunctionBegin;
708   /* do nondiagonal part */
709   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
710   /* send it on its way */
711   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
712   /* do local part */
713   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
714   /* receive remote parts */
715   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 /*
720   This only works correctly for square matrices where the subblock A->A is the
721    diagonal block
722 */
723 #undef __FUNCT__
724 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
725 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
726 {
727   PetscErrorCode ierr;
728   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
729 
730   PetscFunctionBegin;
731   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
732   if (a->rstart != a->cstart || a->rend != a->cend) {
733     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
734   }
735   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatScale_MPIAIJ"
741 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
742 {
743   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
744   PetscErrorCode ierr;
745 
746   PetscFunctionBegin;
747   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
748   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
749   PetscFunctionReturn(0);
750 }
751 
752 #undef __FUNCT__
753 #define __FUNCT__ "MatDestroy_MPIAIJ"
754 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
755 {
756   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
757   PetscErrorCode ierr;
758 
759   PetscFunctionBegin;
760 #if defined(PETSC_USE_LOG)
761   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
762 #endif
763   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
764   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
765   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
766   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
767 #if defined (PETSC_USE_CTABLE)
768   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
769 #else
770   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
771 #endif
772   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
773   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
774   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
775   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
776   ierr = PetscFree(aij);CHKERRQ(ierr);
777 
778   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
779   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
780   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
781   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
782   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
783   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
784   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatView_MPIAIJ_Binary"
790 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
791 {
792   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
793   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
794   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
795   PetscErrorCode    ierr;
796   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
797   int               fd;
798   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
799   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
800   PetscScalar       *column_values;
801 
802   PetscFunctionBegin;
803   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
804   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
805   nz   = A->nz + B->nz;
806   if (!rank) {
807     header[0] = MAT_FILE_COOKIE;
808     header[1] = mat->M;
809     header[2] = mat->N;
810     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
811     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
812     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
813     /* get largest number of rows any processor has */
814     rlen = mat->m;
815     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
816     for (i=1; i<size; i++) {
817       rlen = PetscMax(rlen,range[i+1] - range[i]);
818     }
819   } else {
820     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
821     rlen = mat->m;
822   }
823 
824   /* load up the local row counts */
825   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
826   for (i=0; i<mat->m; i++) {
827     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
828   }
829 
830   /* store the row lengths to the file */
831   if (!rank) {
832     MPI_Status status;
833     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
834     for (i=1; i<size; i++) {
835       rlen = range[i+1] - range[i];
836       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
837       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
838     }
839   } else {
840     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
841   }
842   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
843 
844   /* load up the local column indices */
845   nzmax = nz; /* )th processor needs space a largest processor needs */
846   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
847   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
848   cnt  = 0;
849   for (i=0; i<mat->m; i++) {
850     for (j=B->i[i]; j<B->i[i+1]; j++) {
851       if ( (col = garray[B->j[j]]) > cstart) break;
852       column_indices[cnt++] = col;
853     }
854     for (k=A->i[i]; k<A->i[i+1]; k++) {
855       column_indices[cnt++] = A->j[k] + cstart;
856     }
857     for (; j<B->i[i+1]; j++) {
858       column_indices[cnt++] = garray[B->j[j]];
859     }
860   }
861   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
862 
863   /* store the column indices to the file */
864   if (!rank) {
865     MPI_Status status;
866     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
867     for (i=1; i<size; i++) {
868       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
869       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
870       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
871       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
872     }
873   } else {
874     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
875     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
876   }
877   ierr = PetscFree(column_indices);CHKERRQ(ierr);
878 
879   /* load up the local column values */
880   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
881   cnt  = 0;
882   for (i=0; i<mat->m; i++) {
883     for (j=B->i[i]; j<B->i[i+1]; j++) {
884       if ( garray[B->j[j]] > cstart) break;
885       column_values[cnt++] = B->a[j];
886     }
887     for (k=A->i[i]; k<A->i[i+1]; k++) {
888       column_values[cnt++] = A->a[k];
889     }
890     for (; j<B->i[i+1]; j++) {
891       column_values[cnt++] = B->a[j];
892     }
893   }
894   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
895 
896   /* store the column values to the file */
897   if (!rank) {
898     MPI_Status status;
899     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
900     for (i=1; i<size; i++) {
901       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
902       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
903       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
904       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
905     }
906   } else {
907     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
908     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
909   }
910   ierr = PetscFree(column_values);CHKERRQ(ierr);
911   PetscFunctionReturn(0);
912 }
913 
914 #undef __FUNCT__
915 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
916 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
917 {
918   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
919   PetscErrorCode    ierr;
920   PetscMPIInt       rank = aij->rank,size = aij->size;
921   PetscTruth        isdraw,iascii,isbinary;
922   PetscViewer       sviewer;
923   PetscViewerFormat format;
924 
925   PetscFunctionBegin;
926   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
927   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
928   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
929   if (iascii) {
930     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
931     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
932       MatInfo    info;
933       Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data;
934       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
935       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
936       if (!a->inode.size) {
937         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
938 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
939       } else {
940         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
941 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
942       }
943       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
944       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
945       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
946       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
947       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
948       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
949       PetscFunctionReturn(0);
950     } else if (format == PETSC_VIEWER_ASCII_INFO) {
951       Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data;
952       if (a->inode.size) {
953         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr);
954       } else {
955         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
956       }
957       PetscFunctionReturn(0);
958     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
959       PetscFunctionReturn(0);
960     }
961   } else if (isbinary) {
962     if (size == 1) {
963       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
964       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
965     } else {
966       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
967     }
968     PetscFunctionReturn(0);
969   } else if (isdraw) {
970     PetscDraw  draw;
971     PetscTruth isnull;
972     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
973     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
974   }
975 
976   if (size == 1) {
977     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
978     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
979   } else {
980     /* assemble the entire matrix onto first processor. */
981     Mat         A;
982     Mat_SeqAIJ  *Aloc;
983     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
984     PetscScalar *a;
985 
986     if (!rank) {
987       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
988     } else {
989       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
990     }
991     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
992     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
993     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
994     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
995 
996     /* copy over the A part */
997     Aloc = (Mat_SeqAIJ*)aij->A->data;
998     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
999     row = aij->rstart;
1000     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
1001     for (i=0; i<m; i++) {
1002       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1003       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1004     }
1005     aj = Aloc->j;
1006     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
1007 
1008     /* copy over the B part */
1009     Aloc = (Mat_SeqAIJ*)aij->B->data;
1010     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1011     row  = aij->rstart;
1012     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1013     ct   = cols;
1014     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1015     for (i=0; i<m; i++) {
1016       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1017       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1018     }
1019     ierr = PetscFree(ct);CHKERRQ(ierr);
1020     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1021     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1022     /*
1023        Everyone has to call to draw the matrix since the graphics waits are
1024        synchronized across all processors that share the PetscDraw object
1025     */
1026     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1027     if (!rank) {
1028       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1029       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1030     }
1031     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1032     ierr = MatDestroy(A);CHKERRQ(ierr);
1033   }
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ"
1039 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1040 {
1041   PetscErrorCode ierr;
1042   PetscTruth     iascii,isdraw,issocket,isbinary;
1043 
1044   PetscFunctionBegin;
1045   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1046   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1047   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1048   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1049   if (iascii || isdraw || isbinary || issocket) {
1050     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1051   } else {
1052     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1053   }
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 
1058 
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MatRelax_MPIAIJ"
1061 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1062 {
1063   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1064   PetscErrorCode ierr;
1065   Vec            bb1;
1066   PetscScalar    mone=-1.0;
1067 
1068   PetscFunctionBegin;
1069   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1070 
1071   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1072 
1073   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1074     if (flag & SOR_ZERO_INITIAL_GUESS) {
1075       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1076       its--;
1077     }
1078 
1079     while (its--) {
1080       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1081       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1082 
1083       /* update rhs: bb1 = bb - B*x */
1084       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1085       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1086 
1087       /* local sweep */
1088       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1089       CHKERRQ(ierr);
1090     }
1091   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1092     if (flag & SOR_ZERO_INITIAL_GUESS) {
1093       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1094       its--;
1095     }
1096     while (its--) {
1097       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1098       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1099 
1100       /* update rhs: bb1 = bb - B*x */
1101       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1102       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1103 
1104       /* local sweep */
1105       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1106       CHKERRQ(ierr);
1107     }
1108   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1109     if (flag & SOR_ZERO_INITIAL_GUESS) {
1110       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1111       its--;
1112     }
1113     while (its--) {
1114       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1115       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1116 
1117       /* update rhs: bb1 = bb - B*x */
1118       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1119       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1120 
1121       /* local sweep */
1122       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1123       CHKERRQ(ierr);
1124     }
1125   } else {
1126     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1127   }
1128 
1129   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1135 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1136 {
1137   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1138   Mat            A = mat->A,B = mat->B;
1139   PetscErrorCode ierr;
1140   PetscReal      isend[5],irecv[5];
1141 
1142   PetscFunctionBegin;
1143   info->block_size     = 1.0;
1144   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1145   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1146   isend[3] = info->memory;  isend[4] = info->mallocs;
1147   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1148   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1149   isend[3] += info->memory;  isend[4] += info->mallocs;
1150   if (flag == MAT_LOCAL) {
1151     info->nz_used      = isend[0];
1152     info->nz_allocated = isend[1];
1153     info->nz_unneeded  = isend[2];
1154     info->memory       = isend[3];
1155     info->mallocs      = isend[4];
1156   } else if (flag == MAT_GLOBAL_MAX) {
1157     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1158     info->nz_used      = irecv[0];
1159     info->nz_allocated = irecv[1];
1160     info->nz_unneeded  = irecv[2];
1161     info->memory       = irecv[3];
1162     info->mallocs      = irecv[4];
1163   } else if (flag == MAT_GLOBAL_SUM) {
1164     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1165     info->nz_used      = irecv[0];
1166     info->nz_allocated = irecv[1];
1167     info->nz_unneeded  = irecv[2];
1168     info->memory       = irecv[3];
1169     info->mallocs      = irecv[4];
1170   }
1171   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1172   info->fill_ratio_needed = 0;
1173   info->factor_mallocs    = 0;
1174   info->rows_global       = (double)matin->M;
1175   info->columns_global    = (double)matin->N;
1176   info->rows_local        = (double)matin->m;
1177   info->columns_local     = (double)matin->N;
1178 
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatSetOption_MPIAIJ"
1184 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1185 {
1186   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190   switch (op) {
1191   case MAT_NO_NEW_NONZERO_LOCATIONS:
1192   case MAT_YES_NEW_NONZERO_LOCATIONS:
1193   case MAT_COLUMNS_UNSORTED:
1194   case MAT_COLUMNS_SORTED:
1195   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1196   case MAT_KEEP_ZEROED_ROWS:
1197   case MAT_NEW_NONZERO_LOCATION_ERR:
1198   case MAT_USE_INODES:
1199   case MAT_DO_NOT_USE_INODES:
1200   case MAT_IGNORE_ZERO_ENTRIES:
1201     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1202     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1203     break;
1204   case MAT_ROW_ORIENTED:
1205     a->roworiented = PETSC_TRUE;
1206     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1207     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1208     break;
1209   case MAT_ROWS_SORTED:
1210   case MAT_ROWS_UNSORTED:
1211   case MAT_YES_NEW_DIAGONALS:
1212     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1213     break;
1214   case MAT_COLUMN_ORIENTED:
1215     a->roworiented = PETSC_FALSE;
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1218     break;
1219   case MAT_IGNORE_OFF_PROC_ENTRIES:
1220     a->donotstash = PETSC_TRUE;
1221     break;
1222   case MAT_NO_NEW_DIAGONALS:
1223     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1224   case MAT_SYMMETRIC:
1225   case MAT_STRUCTURALLY_SYMMETRIC:
1226   case MAT_HERMITIAN:
1227   case MAT_SYMMETRY_ETERNAL:
1228     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1229     break;
1230   case MAT_NOT_SYMMETRIC:
1231   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1232   case MAT_NOT_HERMITIAN:
1233   case MAT_NOT_SYMMETRY_ETERNAL:
1234     break;
1235   default:
1236     SETERRQ(PETSC_ERR_SUP,"unknown option");
1237   }
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetRow_MPIAIJ"
1243 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1244 {
1245   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1246   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1247   PetscErrorCode ierr;
1248   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1249   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1250   PetscInt       *cmap,*idx_p;
1251 
1252   PetscFunctionBegin;
1253   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1254   mat->getrowactive = PETSC_TRUE;
1255 
1256   if (!mat->rowvalues && (idx || v)) {
1257     /*
1258         allocate enough space to hold information from the longest row.
1259     */
1260     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1261     PetscInt     max = 1,tmp;
1262     for (i=0; i<matin->m; i++) {
1263       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1264       if (max < tmp) { max = tmp; }
1265     }
1266     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1267     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1268   }
1269 
1270   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1271   lrow = row - rstart;
1272 
1273   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1274   if (!v)   {pvA = 0; pvB = 0;}
1275   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1276   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1277   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1278   nztot = nzA + nzB;
1279 
1280   cmap  = mat->garray;
1281   if (v  || idx) {
1282     if (nztot) {
1283       /* Sort by increasing column numbers, assuming A and B already sorted */
1284       PetscInt imark = -1;
1285       if (v) {
1286         *v = v_p = mat->rowvalues;
1287         for (i=0; i<nzB; i++) {
1288           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1289           else break;
1290         }
1291         imark = i;
1292         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1293         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1294       }
1295       if (idx) {
1296         *idx = idx_p = mat->rowindices;
1297         if (imark > -1) {
1298           for (i=0; i<imark; i++) {
1299             idx_p[i] = cmap[cworkB[i]];
1300           }
1301         } else {
1302           for (i=0; i<nzB; i++) {
1303             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1304             else break;
1305           }
1306           imark = i;
1307         }
1308         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1309         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1310       }
1311     } else {
1312       if (idx) *idx = 0;
1313       if (v)   *v   = 0;
1314     }
1315   }
1316   *nz = nztot;
1317   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1318   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1324 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1325 {
1326   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1327 
1328   PetscFunctionBegin;
1329   if (!aij->getrowactive) {
1330     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1331   }
1332   aij->getrowactive = PETSC_FALSE;
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatNorm_MPIAIJ"
1338 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1339 {
1340   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1341   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1342   PetscErrorCode ierr;
1343   PetscInt       i,j,cstart = aij->cstart;
1344   PetscReal      sum = 0.0;
1345   PetscScalar    *v;
1346 
1347   PetscFunctionBegin;
1348   if (aij->size == 1) {
1349     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1350   } else {
1351     if (type == NORM_FROBENIUS) {
1352       v = amat->a;
1353       for (i=0; i<amat->nz; i++) {
1354 #if defined(PETSC_USE_COMPLEX)
1355         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1356 #else
1357         sum += (*v)*(*v); v++;
1358 #endif
1359       }
1360       v = bmat->a;
1361       for (i=0; i<bmat->nz; i++) {
1362 #if defined(PETSC_USE_COMPLEX)
1363         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1364 #else
1365         sum += (*v)*(*v); v++;
1366 #endif
1367       }
1368       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1369       *norm = sqrt(*norm);
1370     } else if (type == NORM_1) { /* max column norm */
1371       PetscReal *tmp,*tmp2;
1372       PetscInt    *jj,*garray = aij->garray;
1373       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1374       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1375       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1376       *norm = 0.0;
1377       v = amat->a; jj = amat->j;
1378       for (j=0; j<amat->nz; j++) {
1379         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1380       }
1381       v = bmat->a; jj = bmat->j;
1382       for (j=0; j<bmat->nz; j++) {
1383         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1384       }
1385       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1386       for (j=0; j<mat->N; j++) {
1387         if (tmp2[j] > *norm) *norm = tmp2[j];
1388       }
1389       ierr = PetscFree(tmp);CHKERRQ(ierr);
1390       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1391     } else if (type == NORM_INFINITY) { /* max row norm */
1392       PetscReal ntemp = 0.0;
1393       for (j=0; j<aij->A->m; j++) {
1394         v = amat->a + amat->i[j];
1395         sum = 0.0;
1396         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1397           sum += PetscAbsScalar(*v); v++;
1398         }
1399         v = bmat->a + bmat->i[j];
1400         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1401           sum += PetscAbsScalar(*v); v++;
1402         }
1403         if (sum > ntemp) ntemp = sum;
1404       }
1405       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1406     } else {
1407       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1408     }
1409   }
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatTranspose_MPIAIJ"
1415 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1416 {
1417   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1418   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1419   PetscErrorCode ierr;
1420   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1421   Mat            B;
1422   PetscScalar    *array;
1423 
1424   PetscFunctionBegin;
1425   if (!matout && M != N) {
1426     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1427   }
1428 
1429   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1430   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1431   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1432 
1433   /* copy over the A part */
1434   Aloc = (Mat_SeqAIJ*)a->A->data;
1435   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1436   row = a->rstart;
1437   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1438   for (i=0; i<m; i++) {
1439     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1440     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1441   }
1442   aj = Aloc->j;
1443   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1444 
1445   /* copy over the B part */
1446   Aloc = (Mat_SeqAIJ*)a->B->data;
1447   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1448   row  = a->rstart;
1449   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1450   ct   = cols;
1451   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1452   for (i=0; i<m; i++) {
1453     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1454     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1455   }
1456   ierr = PetscFree(ct);CHKERRQ(ierr);
1457   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1458   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1459   if (matout) {
1460     *matout = B;
1461   } else {
1462     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1463   }
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1469 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1470 {
1471   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1472   Mat            a = aij->A,b = aij->B;
1473   PetscErrorCode ierr;
1474   PetscInt       s1,s2,s3;
1475 
1476   PetscFunctionBegin;
1477   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1478   if (rr) {
1479     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1480     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1481     /* Overlap communication with computation. */
1482     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1483   }
1484   if (ll) {
1485     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1486     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1487     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1488   }
1489   /* scale  the diagonal block */
1490   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1491 
1492   if (rr) {
1493     /* Do a scatter end and then right scale the off-diagonal block */
1494     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1495     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1496   }
1497 
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1504 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1505 {
1506   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   if (!a->rank) {
1511     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1512   }
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1518 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1519 {
1520   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1521   PetscErrorCode ierr;
1522 
1523   PetscFunctionBegin;
1524   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1525   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1526   PetscFunctionReturn(0);
1527 }
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1530 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1531 {
1532   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1533   PetscErrorCode ierr;
1534 
1535   PetscFunctionBegin;
1536   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatEqual_MPIAIJ"
1542 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1543 {
1544   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1545   Mat            a,b,c,d;
1546   PetscTruth     flg;
1547   PetscErrorCode ierr;
1548 
1549   PetscFunctionBegin;
1550   a = matA->A; b = matA->B;
1551   c = matB->A; d = matB->B;
1552 
1553   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1554   if (flg) {
1555     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1556   }
1557   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatCopy_MPIAIJ"
1563 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1564 {
1565   PetscErrorCode ierr;
1566   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1567   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1568 
1569   PetscFunctionBegin;
1570   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1571   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1572     /* because of the column compression in the off-processor part of the matrix a->B,
1573        the number of columns in a->B and b->B may be different, hence we cannot call
1574        the MatCopy() directly on the two parts. If need be, we can provide a more
1575        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1576        then copying the submatrices */
1577     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1578   } else {
1579     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1580     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1587 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1588 {
1589   PetscErrorCode ierr;
1590 
1591   PetscFunctionBegin;
1592   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 #include "petscblaslapack.h"
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatAXPY_MPIAIJ"
1599 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1600 {
1601   PetscErrorCode ierr;
1602   PetscInt       i;
1603   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1604   PetscBLASInt   bnz,one=1;
1605   Mat_SeqAIJ     *x,*y;
1606 
1607   PetscFunctionBegin;
1608   if (str == SAME_NONZERO_PATTERN) {
1609     x = (Mat_SeqAIJ *)xx->A->data;
1610     y = (Mat_SeqAIJ *)yy->A->data;
1611     bnz = (PetscBLASInt)x->nz;
1612     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1613     x = (Mat_SeqAIJ *)xx->B->data;
1614     y = (Mat_SeqAIJ *)yy->B->data;
1615     bnz = (PetscBLASInt)x->nz;
1616     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1617   } else if (str == SUBSET_NONZERO_PATTERN) {
1618     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1619 
1620     x = (Mat_SeqAIJ *)xx->B->data;
1621     y = (Mat_SeqAIJ *)yy->B->data;
1622     if (y->xtoy && y->XtoY != xx->B) {
1623       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1624       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1625     }
1626     if (!y->xtoy) { /* get xtoy */
1627       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1628       y->XtoY = xx->B;
1629     }
1630     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1631   } else {
1632     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1633   }
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 /* -------------------------------------------------------------------*/
1638 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1639        MatGetRow_MPIAIJ,
1640        MatRestoreRow_MPIAIJ,
1641        MatMult_MPIAIJ,
1642 /* 4*/ MatMultAdd_MPIAIJ,
1643        MatMultTranspose_MPIAIJ,
1644        MatMultTransposeAdd_MPIAIJ,
1645        0,
1646        0,
1647        0,
1648 /*10*/ 0,
1649        0,
1650        0,
1651        MatRelax_MPIAIJ,
1652        MatTranspose_MPIAIJ,
1653 /*15*/ MatGetInfo_MPIAIJ,
1654        MatEqual_MPIAIJ,
1655        MatGetDiagonal_MPIAIJ,
1656        MatDiagonalScale_MPIAIJ,
1657        MatNorm_MPIAIJ,
1658 /*20*/ MatAssemblyBegin_MPIAIJ,
1659        MatAssemblyEnd_MPIAIJ,
1660        0,
1661        MatSetOption_MPIAIJ,
1662        MatZeroEntries_MPIAIJ,
1663 /*25*/ MatZeroRows_MPIAIJ,
1664        0,
1665        0,
1666        0,
1667        0,
1668 /*30*/ MatSetUpPreallocation_MPIAIJ,
1669        0,
1670        0,
1671        0,
1672        0,
1673 /*35*/ MatDuplicate_MPIAIJ,
1674        0,
1675        0,
1676        0,
1677        0,
1678 /*40*/ MatAXPY_MPIAIJ,
1679        MatGetSubMatrices_MPIAIJ,
1680        MatIncreaseOverlap_MPIAIJ,
1681        MatGetValues_MPIAIJ,
1682        MatCopy_MPIAIJ,
1683 /*45*/ MatPrintHelp_MPIAIJ,
1684        MatScale_MPIAIJ,
1685        0,
1686        0,
1687        0,
1688 /*50*/ MatSetBlockSize_MPIAIJ,
1689        0,
1690        0,
1691        0,
1692        0,
1693 /*55*/ MatFDColoringCreate_MPIAIJ,
1694        0,
1695        MatSetUnfactored_MPIAIJ,
1696        0,
1697        0,
1698 /*60*/ MatGetSubMatrix_MPIAIJ,
1699        MatDestroy_MPIAIJ,
1700        MatView_MPIAIJ,
1701        MatGetPetscMaps_Petsc,
1702        0,
1703 /*65*/ 0,
1704        0,
1705        0,
1706        0,
1707        0,
1708 /*70*/ 0,
1709        0,
1710        MatSetColoring_MPIAIJ,
1711 #if defined(PETSC_HAVE_ADIC)
1712        MatSetValuesAdic_MPIAIJ,
1713 #else
1714        0,
1715 #endif
1716        MatSetValuesAdifor_MPIAIJ,
1717 /*75*/ 0,
1718        0,
1719        0,
1720        0,
1721        0,
1722 /*80*/ 0,
1723        0,
1724        0,
1725        0,
1726 /*84*/ MatLoad_MPIAIJ,
1727        0,
1728        0,
1729        0,
1730        0,
1731        0,
1732 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1733        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1734        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1735        MatPtAP_Basic,
1736        MatPtAPSymbolic_MPIAIJ,
1737 /*95*/ MatPtAPNumeric_MPIAIJ,
1738        0,
1739        0,
1740        0,
1741        0,
1742 /*100*/0,
1743        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1744        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1745 };
1746 
1747 /* ----------------------------------------------------------------------------------------*/
1748 
1749 EXTERN_C_BEGIN
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1752 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1753 {
1754   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1755   PetscErrorCode ierr;
1756 
1757   PetscFunctionBegin;
1758   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1759   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 EXTERN_C_END
1763 
1764 EXTERN_C_BEGIN
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1767 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1768 {
1769   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1770   PetscErrorCode ierr;
1771 
1772   PetscFunctionBegin;
1773   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1774   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 EXTERN_C_END
1778 
1779 #include "petscpc.h"
1780 EXTERN_C_BEGIN
1781 #undef __FUNCT__
1782 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1783 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1784 {
1785   Mat_MPIAIJ     *b;
1786   PetscErrorCode ierr;
1787   PetscInt       i;
1788 
1789   PetscFunctionBegin;
1790   B->preallocated = PETSC_TRUE;
1791   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1792   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1793   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1794   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1795   if (d_nnz) {
1796     for (i=0; i<B->m; i++) {
1797       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1798     }
1799   }
1800   if (o_nnz) {
1801     for (i=0; i<B->m; i++) {
1802       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1803     }
1804   }
1805   b = (Mat_MPIAIJ*)B->data;
1806   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1807   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1808 
1809   PetscFunctionReturn(0);
1810 }
1811 EXTERN_C_END
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1815 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1816 {
1817   Mat            mat;
1818   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1819   PetscErrorCode ierr;
1820 
1821   PetscFunctionBegin;
1822   *newmat       = 0;
1823   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1824   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1825   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1826   a    = (Mat_MPIAIJ*)mat->data;
1827 
1828   mat->factor       = matin->factor;
1829   mat->bs           = matin->bs;
1830   mat->assembled    = PETSC_TRUE;
1831   mat->insertmode   = NOT_SET_VALUES;
1832   mat->preallocated = PETSC_TRUE;
1833 
1834   a->rstart       = oldmat->rstart;
1835   a->rend         = oldmat->rend;
1836   a->cstart       = oldmat->cstart;
1837   a->cend         = oldmat->cend;
1838   a->size         = oldmat->size;
1839   a->rank         = oldmat->rank;
1840   a->donotstash   = oldmat->donotstash;
1841   a->roworiented  = oldmat->roworiented;
1842   a->rowindices   = 0;
1843   a->rowvalues    = 0;
1844   a->getrowactive = PETSC_FALSE;
1845 
1846   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1847   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1848   if (oldmat->colmap) {
1849 #if defined (PETSC_USE_CTABLE)
1850     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1851 #else
1852     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1853     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1854     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1855 #endif
1856   } else a->colmap = 0;
1857   if (oldmat->garray) {
1858     PetscInt len;
1859     len  = oldmat->B->n;
1860     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1861     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1862     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1863   } else a->garray = 0;
1864 
1865   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1866   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1867   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1868   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1869   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1870   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1871   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1872   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1873   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1874   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1875   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1876   *newmat = mat;
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #include "petscsys.h"
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatLoad_MPIAIJ"
1884 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1885 {
1886   Mat            A;
1887   PetscScalar    *vals,*svals;
1888   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1889   MPI_Status     status;
1890   PetscErrorCode ierr;
1891   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1892   PetscInt       i,nz,j,rstart,rend,mmax;
1893   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1894   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1895   PetscInt       cend,cstart,n,*rowners;
1896   int            fd;
1897 
1898   PetscFunctionBegin;
1899   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1900   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1901   if (!rank) {
1902     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1903     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1904     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1905   }
1906 
1907   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1908   M = header[1]; N = header[2];
1909   /* determine ownership of all rows */
1910   m    = M/size + ((M % size) > rank);
1911   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1912   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1913 
1914   /* First process needs enough room for process with most rows */
1915   if (!rank) {
1916     mmax       = rowners[1];
1917     for (i=2; i<size; i++) {
1918       mmax = PetscMax(mmax,rowners[i]);
1919     }
1920   } else mmax = m;
1921 
1922   rowners[0] = 0;
1923   for (i=2; i<=size; i++) {
1924     mmax       = PetscMax(mmax,rowners[i]);
1925     rowners[i] += rowners[i-1];
1926   }
1927   rstart = rowners[rank];
1928   rend   = rowners[rank+1];
1929 
1930   /* distribute row lengths to all processors */
1931   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1932   if (!rank) {
1933     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1934     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1935     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1936     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1937     for (j=0; j<m; j++) {
1938       procsnz[0] += ourlens[j];
1939     }
1940     for (i=1; i<size; i++) {
1941       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1942       /* calculate the number of nonzeros on each processor */
1943       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1944         procsnz[i] += rowlengths[j];
1945       }
1946       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1947     }
1948     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1949   } else {
1950     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1951   }
1952 
1953   if (!rank) {
1954     /* determine max buffer needed and allocate it */
1955     maxnz = 0;
1956     for (i=0; i<size; i++) {
1957       maxnz = PetscMax(maxnz,procsnz[i]);
1958     }
1959     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1960 
1961     /* read in my part of the matrix column indices  */
1962     nz   = procsnz[0];
1963     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1964     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1965 
1966     /* read in every one elses and ship off */
1967     for (i=1; i<size; i++) {
1968       nz   = procsnz[i];
1969       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1970       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1971     }
1972     ierr = PetscFree(cols);CHKERRQ(ierr);
1973   } else {
1974     /* determine buffer space needed for message */
1975     nz = 0;
1976     for (i=0; i<m; i++) {
1977       nz += ourlens[i];
1978     }
1979     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1980 
1981     /* receive message of column indices*/
1982     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1983     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1984     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1985   }
1986 
1987   /* determine column ownership if matrix is not square */
1988   if (N != M) {
1989     n      = N/size + ((N % size) > rank);
1990     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1991     cstart = cend - n;
1992   } else {
1993     cstart = rstart;
1994     cend   = rend;
1995     n      = cend - cstart;
1996   }
1997 
1998   /* loop over local rows, determining number of off diagonal entries */
1999   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2000   jj = 0;
2001   for (i=0; i<m; i++) {
2002     for (j=0; j<ourlens[i]; j++) {
2003       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2004       jj++;
2005     }
2006   }
2007 
2008   /* create our matrix */
2009   for (i=0; i<m; i++) {
2010     ourlens[i] -= offlens[i];
2011   }
2012   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2013   ierr = MatSetType(A,type);CHKERRQ(ierr);
2014   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2015 
2016   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2017   for (i=0; i<m; i++) {
2018     ourlens[i] += offlens[i];
2019   }
2020 
2021   if (!rank) {
2022     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2023 
2024     /* read in my part of the matrix numerical values  */
2025     nz   = procsnz[0];
2026     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2027 
2028     /* insert into matrix */
2029     jj      = rstart;
2030     smycols = mycols;
2031     svals   = vals;
2032     for (i=0; i<m; i++) {
2033       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2034       smycols += ourlens[i];
2035       svals   += ourlens[i];
2036       jj++;
2037     }
2038 
2039     /* read in other processors and ship out */
2040     for (i=1; i<size; i++) {
2041       nz   = procsnz[i];
2042       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2043       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2044     }
2045     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2046   } else {
2047     /* receive numeric values */
2048     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2049 
2050     /* receive message of values*/
2051     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2052     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2053     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2054 
2055     /* insert into matrix */
2056     jj      = rstart;
2057     smycols = mycols;
2058     svals   = vals;
2059     for (i=0; i<m; i++) {
2060       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2061       smycols += ourlens[i];
2062       svals   += ourlens[i];
2063       jj++;
2064     }
2065   }
2066   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2067   ierr = PetscFree(vals);CHKERRQ(ierr);
2068   ierr = PetscFree(mycols);CHKERRQ(ierr);
2069   ierr = PetscFree(rowners);CHKERRQ(ierr);
2070 
2071   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2072   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2073   *newmat = A;
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2079 /*
2080     Not great since it makes two copies of the submatrix, first an SeqAIJ
2081   in local and then by concatenating the local matrices the end result.
2082   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2083 */
2084 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2085 {
2086   PetscErrorCode ierr;
2087   PetscMPIInt    rank,size;
2088   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2089   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2090   Mat            *local,M,Mreuse;
2091   PetscScalar    *vwork,*aa;
2092   MPI_Comm       comm = mat->comm;
2093   Mat_SeqAIJ     *aij;
2094 
2095 
2096   PetscFunctionBegin;
2097   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2098   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2099 
2100   if (call ==  MAT_REUSE_MATRIX) {
2101     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2102     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2103     local = &Mreuse;
2104     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2105   } else {
2106     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2107     Mreuse = *local;
2108     ierr   = PetscFree(local);CHKERRQ(ierr);
2109   }
2110 
2111   /*
2112       m - number of local rows
2113       n - number of columns (same on all processors)
2114       rstart - first row in new global matrix generated
2115   */
2116   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2117   if (call == MAT_INITIAL_MATRIX) {
2118     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2119     ii  = aij->i;
2120     jj  = aij->j;
2121 
2122     /*
2123         Determine the number of non-zeros in the diagonal and off-diagonal
2124         portions of the matrix in order to do correct preallocation
2125     */
2126 
2127     /* first get start and end of "diagonal" columns */
2128     if (csize == PETSC_DECIDE) {
2129       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2130       if (mglobal == n) { /* square matrix */
2131 	nlocal = m;
2132       } else {
2133         nlocal = n/size + ((n % size) > rank);
2134       }
2135     } else {
2136       nlocal = csize;
2137     }
2138     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2139     rstart = rend - nlocal;
2140     if (rank == size - 1 && rend != n) {
2141       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2142     }
2143 
2144     /* next, compute all the lengths */
2145     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2146     olens = dlens + m;
2147     for (i=0; i<m; i++) {
2148       jend = ii[i+1] - ii[i];
2149       olen = 0;
2150       dlen = 0;
2151       for (j=0; j<jend; j++) {
2152         if (*jj < rstart || *jj >= rend) olen++;
2153         else dlen++;
2154         jj++;
2155       }
2156       olens[i] = olen;
2157       dlens[i] = dlen;
2158     }
2159     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2160     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2161     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2162     ierr = PetscFree(dlens);CHKERRQ(ierr);
2163   } else {
2164     PetscInt ml,nl;
2165 
2166     M = *newmat;
2167     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2168     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2169     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2170     /*
2171          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2172        rather than the slower MatSetValues().
2173     */
2174     M->was_assembled = PETSC_TRUE;
2175     M->assembled     = PETSC_FALSE;
2176   }
2177   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2178   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2179   ii  = aij->i;
2180   jj  = aij->j;
2181   aa  = aij->a;
2182   for (i=0; i<m; i++) {
2183     row   = rstart + i;
2184     nz    = ii[i+1] - ii[i];
2185     cwork = jj;     jj += nz;
2186     vwork = aa;     aa += nz;
2187     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2188   }
2189 
2190   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2191   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2192   *newmat = M;
2193 
2194   /* save submatrix used in processor for next request */
2195   if (call ==  MAT_INITIAL_MATRIX) {
2196     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2197     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2198   }
2199 
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 EXTERN_C_BEGIN
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2206 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2207 {
2208   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2209   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2210   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2211   const PetscInt *JJ;
2212   PetscScalar    *values;
2213   PetscErrorCode ierr;
2214 
2215   PetscFunctionBegin;
2216 #if defined(PETSC_OPT_g)
2217   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2218 #endif
2219   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2220   o_nnz = d_nnz + m;
2221 
2222   for (i=0; i<m; i++) {
2223     nnz     = I[i+1]- I[i];
2224     JJ      = J + I[i];
2225     nnz_max = PetscMax(nnz_max,nnz);
2226 #if defined(PETSC_OPT_g)
2227     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2228 #endif
2229     for (j=0; j<nnz; j++) {
2230       if (*JJ >= cstart) break;
2231       JJ++;
2232     }
2233     d = 0;
2234     for (; j<nnz; j++) {
2235       if (*JJ++ >= cend) break;
2236       d++;
2237     }
2238     d_nnz[i] = d;
2239     o_nnz[i] = nnz - d;
2240   }
2241   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2242   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2243 
2244   if (v) values = (PetscScalar*)v;
2245   else {
2246     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2247     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2248   }
2249 
2250   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2251   for (i=0; i<m; i++) {
2252     ii   = i + rstart;
2253     nnz  = I[i+1]- I[i];
2254     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2255   }
2256   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2257   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2258   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2259 
2260   if (!v) {
2261     ierr = PetscFree(values);CHKERRQ(ierr);
2262   }
2263   PetscFunctionReturn(0);
2264 }
2265 EXTERN_C_END
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2269 /*@C
2270    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2271    (the default parallel PETSc format).
2272 
2273    Collective on MPI_Comm
2274 
2275    Input Parameters:
2276 +  A - the matrix
2277 .  i - the indices into j for the start of each local row (starts with zero)
2278 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2279 -  v - optional values in the matrix
2280 
2281    Level: developer
2282 
2283 .keywords: matrix, aij, compressed row, sparse, parallel
2284 
2285 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2286 @*/
2287 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2288 {
2289   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2290 
2291   PetscFunctionBegin;
2292   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2293   if (f) {
2294     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2295   }
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 #undef __FUNCT__
2300 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2301 /*@C
2302    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2303    (the default parallel PETSc format).  For good matrix assembly performance
2304    the user should preallocate the matrix storage by setting the parameters
2305    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2306    performance can be increased by more than a factor of 50.
2307 
2308    Collective on MPI_Comm
2309 
2310    Input Parameters:
2311 +  A - the matrix
2312 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2313            (same value is used for all local rows)
2314 .  d_nnz - array containing the number of nonzeros in the various rows of the
2315            DIAGONAL portion of the local submatrix (possibly different for each row)
2316            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2317            The size of this array is equal to the number of local rows, i.e 'm'.
2318            You must leave room for the diagonal entry even if it is zero.
2319 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2320            submatrix (same value is used for all local rows).
2321 -  o_nnz - array containing the number of nonzeros in the various rows of the
2322            OFF-DIAGONAL portion of the local submatrix (possibly different for
2323            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2324            structure. The size of this array is equal to the number
2325            of local rows, i.e 'm'.
2326 
2327    If the *_nnz parameter is given then the *_nz parameter is ignored
2328 
2329    The AIJ format (also called the Yale sparse matrix format or
2330    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2331    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2332 
2333    The parallel matrix is partitioned such that the first m0 rows belong to
2334    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2335    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2336 
2337    The DIAGONAL portion of the local submatrix of a processor can be defined
2338    as the submatrix which is obtained by extraction the part corresponding
2339    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2340    first row that belongs to the processor, and r2 is the last row belonging
2341    to the this processor. This is a square mxm matrix. The remaining portion
2342    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2343 
2344    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2345 
2346    Example usage:
2347 
2348    Consider the following 8x8 matrix with 34 non-zero values, that is
2349    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2350    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2351    as follows:
2352 
2353 .vb
2354             1  2  0  |  0  3  0  |  0  4
2355     Proc0   0  5  6  |  7  0  0  |  8  0
2356             9  0 10  | 11  0  0  | 12  0
2357     -------------------------------------
2358            13  0 14  | 15 16 17  |  0  0
2359     Proc1   0 18  0  | 19 20 21  |  0  0
2360             0  0  0  | 22 23  0  | 24  0
2361     -------------------------------------
2362     Proc2  25 26 27  |  0  0 28  | 29  0
2363            30  0  0  | 31 32 33  |  0 34
2364 .ve
2365 
2366    This can be represented as a collection of submatrices as:
2367 
2368 .vb
2369       A B C
2370       D E F
2371       G H I
2372 .ve
2373 
2374    Where the submatrices A,B,C are owned by proc0, D,E,F are
2375    owned by proc1, G,H,I are owned by proc2.
2376 
2377    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2378    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2379    The 'M','N' parameters are 8,8, and have the same values on all procs.
2380 
2381    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2382    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2383    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2384    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2385    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2386    matrix, ans [DF] as another SeqAIJ matrix.
2387 
2388    When d_nz, o_nz parameters are specified, d_nz storage elements are
2389    allocated for every row of the local diagonal submatrix, and o_nz
2390    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2391    One way to choose d_nz and o_nz is to use the max nonzerors per local
2392    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2393    In this case, the values of d_nz,o_nz are:
2394 .vb
2395      proc0 : dnz = 2, o_nz = 2
2396      proc1 : dnz = 3, o_nz = 2
2397      proc2 : dnz = 1, o_nz = 4
2398 .ve
2399    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2400    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2401    for proc3. i.e we are using 12+15+10=37 storage locations to store
2402    34 values.
2403 
2404    When d_nnz, o_nnz parameters are specified, the storage is specified
2405    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2406    In the above case the values for d_nnz,o_nnz are:
2407 .vb
2408      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2409      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2410      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2411 .ve
2412    Here the space allocated is sum of all the above values i.e 34, and
2413    hence pre-allocation is perfect.
2414 
2415    Level: intermediate
2416 
2417 .keywords: matrix, aij, compressed row, sparse, parallel
2418 
2419 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2420           MPIAIJ
2421 @*/
2422 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2423 {
2424   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2425 
2426   PetscFunctionBegin;
2427   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2428   if (f) {
2429     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2430   }
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #undef __FUNCT__
2435 #define __FUNCT__ "MatCreateMPIAIJ"
2436 /*@C
2437    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2438    (the default parallel PETSc format).  For good matrix assembly performance
2439    the user should preallocate the matrix storage by setting the parameters
2440    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2441    performance can be increased by more than a factor of 50.
2442 
2443    Collective on MPI_Comm
2444 
2445    Input Parameters:
2446 +  comm - MPI communicator
2447 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2448            This value should be the same as the local size used in creating the
2449            y vector for the matrix-vector product y = Ax.
2450 .  n - This value should be the same as the local size used in creating the
2451        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2452        calculated if N is given) For square matrices n is almost always m.
2453 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2454 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2455 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2456            (same value is used for all local rows)
2457 .  d_nnz - array containing the number of nonzeros in the various rows of the
2458            DIAGONAL portion of the local submatrix (possibly different for each row)
2459            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2460            The size of this array is equal to the number of local rows, i.e 'm'.
2461            You must leave room for the diagonal entry even if it is zero.
2462 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2463            submatrix (same value is used for all local rows).
2464 -  o_nnz - array containing the number of nonzeros in the various rows of the
2465            OFF-DIAGONAL portion of the local submatrix (possibly different for
2466            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2467            structure. The size of this array is equal to the number
2468            of local rows, i.e 'm'.
2469 
2470    Output Parameter:
2471 .  A - the matrix
2472 
2473    Notes:
2474    If the *_nnz parameter is given then the *_nz parameter is ignored
2475 
2476    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2477    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2478    storage requirements for this matrix.
2479 
2480    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2481    processor than it must be used on all processors that share the object for
2482    that argument.
2483 
2484    The user MUST specify either the local or global matrix dimensions
2485    (possibly both).
2486 
2487    The parallel matrix is partitioned such that the first m0 rows belong to
2488    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2489    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2490 
2491    The DIAGONAL portion of the local submatrix of a processor can be defined
2492    as the submatrix which is obtained by extraction the part corresponding
2493    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2494    first row that belongs to the processor, and r2 is the last row belonging
2495    to the this processor. This is a square mxm matrix. The remaining portion
2496    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2497 
2498    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2499 
2500    When calling this routine with a single process communicator, a matrix of
2501    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2502    type of communicator, use the construction mechanism:
2503      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2504 
2505    By default, this format uses inodes (identical nodes) when possible.
2506    We search for consecutive rows with the same nonzero structure, thereby
2507    reusing matrix information to achieve increased efficiency.
2508 
2509    Options Database Keys:
2510 +  -mat_aij_no_inode  - Do not use inodes
2511 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2512 -  -mat_aij_oneindex - Internally use indexing starting at 1
2513         rather than 0.  Note that when calling MatSetValues(),
2514         the user still MUST index entries starting at 0!
2515 
2516 
2517    Example usage:
2518 
2519    Consider the following 8x8 matrix with 34 non-zero values, that is
2520    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2521    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2522    as follows:
2523 
2524 .vb
2525             1  2  0  |  0  3  0  |  0  4
2526     Proc0   0  5  6  |  7  0  0  |  8  0
2527             9  0 10  | 11  0  0  | 12  0
2528     -------------------------------------
2529            13  0 14  | 15 16 17  |  0  0
2530     Proc1   0 18  0  | 19 20 21  |  0  0
2531             0  0  0  | 22 23  0  | 24  0
2532     -------------------------------------
2533     Proc2  25 26 27  |  0  0 28  | 29  0
2534            30  0  0  | 31 32 33  |  0 34
2535 .ve
2536 
2537    This can be represented as a collection of submatrices as:
2538 
2539 .vb
2540       A B C
2541       D E F
2542       G H I
2543 .ve
2544 
2545    Where the submatrices A,B,C are owned by proc0, D,E,F are
2546    owned by proc1, G,H,I are owned by proc2.
2547 
2548    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2549    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2550    The 'M','N' parameters are 8,8, and have the same values on all procs.
2551 
2552    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2553    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2554    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2555    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2556    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2557    matrix, ans [DF] as another SeqAIJ matrix.
2558 
2559    When d_nz, o_nz parameters are specified, d_nz storage elements are
2560    allocated for every row of the local diagonal submatrix, and o_nz
2561    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2562    One way to choose d_nz and o_nz is to use the max nonzerors per local
2563    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2564    In this case, the values of d_nz,o_nz are:
2565 .vb
2566      proc0 : dnz = 2, o_nz = 2
2567      proc1 : dnz = 3, o_nz = 2
2568      proc2 : dnz = 1, o_nz = 4
2569 .ve
2570    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2571    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2572    for proc3. i.e we are using 12+15+10=37 storage locations to store
2573    34 values.
2574 
2575    When d_nnz, o_nnz parameters are specified, the storage is specified
2576    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2577    In the above case the values for d_nnz,o_nnz are:
2578 .vb
2579      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2580      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2581      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2582 .ve
2583    Here the space allocated is sum of all the above values i.e 34, and
2584    hence pre-allocation is perfect.
2585 
2586    Level: intermediate
2587 
2588 .keywords: matrix, aij, compressed row, sparse, parallel
2589 
2590 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2591           MPIAIJ
2592 @*/
2593 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2594 {
2595   PetscErrorCode ierr;
2596   PetscMPIInt    size;
2597 
2598   PetscFunctionBegin;
2599   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2600   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2601   if (size > 1) {
2602     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2603     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2604   } else {
2605     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2606     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2607   }
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2613 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2614 {
2615   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2616 
2617   PetscFunctionBegin;
2618   *Ad     = a->A;
2619   *Ao     = a->B;
2620   *colmap = a->garray;
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 #undef __FUNCT__
2625 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2626 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2627 {
2628   PetscErrorCode ierr;
2629   PetscInt       i;
2630   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2631 
2632   PetscFunctionBegin;
2633   if (coloring->ctype == IS_COLORING_LOCAL) {
2634     ISColoringValue *allcolors,*colors;
2635     ISColoring      ocoloring;
2636 
2637     /* set coloring for diagonal portion */
2638     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2639 
2640     /* set coloring for off-diagonal portion */
2641     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2642     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2643     for (i=0; i<a->B->n; i++) {
2644       colors[i] = allcolors[a->garray[i]];
2645     }
2646     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2647     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2648     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2649     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2650   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2651     ISColoringValue *colors;
2652     PetscInt             *larray;
2653     ISColoring      ocoloring;
2654 
2655     /* set coloring for diagonal portion */
2656     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2657     for (i=0; i<a->A->n; i++) {
2658       larray[i] = i + a->cstart;
2659     }
2660     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2661     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2662     for (i=0; i<a->A->n; i++) {
2663       colors[i] = coloring->colors[larray[i]];
2664     }
2665     ierr = PetscFree(larray);CHKERRQ(ierr);
2666     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2667     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2668     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2669 
2670     /* set coloring for off-diagonal portion */
2671     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2672     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2673     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2674     for (i=0; i<a->B->n; i++) {
2675       colors[i] = coloring->colors[larray[i]];
2676     }
2677     ierr = PetscFree(larray);CHKERRQ(ierr);
2678     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2679     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2680     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2681   } else {
2682     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2683   }
2684 
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 #if defined(PETSC_HAVE_ADIC)
2689 #undef __FUNCT__
2690 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2691 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2692 {
2693   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2694   PetscErrorCode ierr;
2695 
2696   PetscFunctionBegin;
2697   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2698   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2699   PetscFunctionReturn(0);
2700 }
2701 #endif
2702 
2703 #undef __FUNCT__
2704 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2705 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2706 {
2707   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2708   PetscErrorCode ierr;
2709 
2710   PetscFunctionBegin;
2711   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2712   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatMerge"
2718 /*@C
2719       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2720                  matrices from each processor
2721 
2722     Collective on MPI_Comm
2723 
2724    Input Parameters:
2725 +    comm - the communicators the parallel matrix will live on
2726 .    inmat - the input sequential matrices
2727 .    n - number of local columns (or PETSC_DECIDE)
2728 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2729 
2730    Output Parameter:
2731 .    outmat - the parallel matrix generated
2732 
2733     Level: advanced
2734 
2735    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2736 
2737 @*/
2738 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2739 {
2740   PetscErrorCode ierr;
2741   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2742   PetscInt       *indx;
2743   PetscScalar    *values;
2744   PetscMap       columnmap,rowmap;
2745 
2746   PetscFunctionBegin;
2747     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2748   /*
2749   PetscMPIInt       rank;
2750   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2751   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2752   */
2753   if (scall == MAT_INITIAL_MATRIX){
2754     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2755     if (n == PETSC_DECIDE){
2756       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2757       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2758       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2759       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2760       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2761     }
2762 
2763     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2764     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2765     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2766     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2767     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2768 
2769     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2770     for (i=0;i<m;i++) {
2771       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2772       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2773       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2774     }
2775     /* This routine will ONLY return MPIAIJ type matrix */
2776     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2777     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2778     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2779     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2780 
2781   } else if (scall == MAT_REUSE_MATRIX){
2782     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2783   } else {
2784     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2785   }
2786 
2787   for (i=0;i<m;i++) {
2788     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2789     I    = i + rstart;
2790     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2791     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2792   }
2793   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2794   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2795   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2796 
2797   PetscFunctionReturn(0);
2798 }
2799 
2800 #undef __FUNCT__
2801 #define __FUNCT__ "MatFileSplit"
2802 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2803 {
2804   PetscErrorCode    ierr;
2805   PetscMPIInt       rank;
2806   PetscInt          m,N,i,rstart,nnz;
2807   size_t            len;
2808   const PetscInt    *indx;
2809   PetscViewer       out;
2810   char              *name;
2811   Mat               B;
2812   const PetscScalar *values;
2813 
2814   PetscFunctionBegin;
2815   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2816   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2817   /* Should this be the type of the diagonal block of A? */
2818   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2819   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2820   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2821   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2822   for (i=0;i<m;i++) {
2823     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2824     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2825     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2826   }
2827   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2828   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2829 
2830   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2831   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2832   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2833   sprintf(name,"%s.%d",outfile,rank);
2834   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2835   ierr = PetscFree(name);
2836   ierr = MatView(B,out);CHKERRQ(ierr);
2837   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2838   ierr = MatDestroy(B);CHKERRQ(ierr);
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2843 #undef __FUNCT__
2844 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2845 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2846 {
2847   PetscErrorCode       ierr;
2848   Mat_Merge_SeqsToMPI  *merge;
2849   PetscObjectContainer container;
2850 
2851   PetscFunctionBegin;
2852   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2853   if (container) {
2854     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2855     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2856     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2857     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2858     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2859     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2860     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2861     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2862     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2863     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2864     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2865     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2866 
2867     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2868     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2869   }
2870   ierr = PetscFree(merge);CHKERRQ(ierr);
2871 
2872   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 #include "src/mat/utils/freespace.h"
2877 #include "petscbt.h"
2878 #undef __FUNCT__
2879 #define __FUNCT__ "MatMerge_SeqsToMPI"
2880 /*@C
2881       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2882                  matrices from each processor
2883 
2884     Collective on MPI_Comm
2885 
2886    Input Parameters:
2887 +    comm - the communicators the parallel matrix will live on
2888 .    seqmat - the input sequential matrices
2889 .    m - number of local rows (or PETSC_DECIDE)
2890 .    n - number of local columns (or PETSC_DECIDE)
2891 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2892 
2893    Output Parameter:
2894 .    mpimat - the parallel matrix generated
2895 
2896     Level: advanced
2897 
2898    Notes:
2899      The dimensions of the sequential matrix in each processor MUST be the same.
2900      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2901      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2902 @*/
2903 static PetscEvent logkey_seqstompinum = 0;
2904 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2905 {
2906   PetscErrorCode       ierr;
2907   MPI_Comm             comm=mpimat->comm;
2908   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2909   PetscMPIInt          size,rank,taga,*len_s;
2910   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2911   PetscInt             proc,m;
2912   PetscInt             **buf_ri,**buf_rj;
2913   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2914   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2915   MPI_Request          *s_waits,*r_waits;
2916   MPI_Status           *status;
2917   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2918   Mat_Merge_SeqsToMPI  *merge;
2919   PetscObjectContainer container;
2920 
2921   PetscFunctionBegin;
2922   if (!logkey_seqstompinum) {
2923     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2924   }
2925   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2926 
2927   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2928   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2929 
2930   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2931   if (container) {
2932     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2933   }
2934   bi     = merge->bi;
2935   bj     = merge->bj;
2936   buf_ri = merge->buf_ri;
2937   buf_rj = merge->buf_rj;
2938 
2939   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2940   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2941   len_s  = merge->len_s;
2942 
2943   /* send and recv matrix values */
2944   /*-----------------------------*/
2945   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2946   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2947 
2948   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2949   for (proc=0,k=0; proc<size; proc++){
2950     if (!len_s[proc]) continue;
2951     i = owners[proc];
2952     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2953     k++;
2954   }
2955 
2956   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2957   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2958   ierr = PetscFree(status);CHKERRQ(ierr);
2959 
2960   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2961   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2962 
2963   /* insert mat values of mpimat */
2964   /*----------------------------*/
2965   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2966   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2967   nextrow = buf_ri_k + merge->nrecv;
2968   nextai  = nextrow + merge->nrecv;
2969 
2970   for (k=0; k<merge->nrecv; k++){
2971     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2972     nrows = *(buf_ri_k[k]);
2973     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2974     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2975   }
2976 
2977   /* set values of ba */
2978   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2979   for (i=0; i<m; i++) {
2980     arow = owners[rank] + i;
2981     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2982     bnzi = bi[i+1] - bi[i];
2983     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2984 
2985     /* add local non-zero vals of this proc's seqmat into ba */
2986     anzi = ai[arow+1] - ai[arow];
2987     aj   = a->j + ai[arow];
2988     aa   = a->a + ai[arow];
2989     nextaj = 0;
2990     for (j=0; nextaj<anzi; j++){
2991       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2992         ba_i[j] += aa[nextaj++];
2993       }
2994     }
2995 
2996     /* add received vals into ba */
2997     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2998       /* i-th row */
2999       if (i == *nextrow[k]) {
3000         anzi = *(nextai[k]+1) - *nextai[k];
3001         aj   = buf_rj[k] + *(nextai[k]);
3002         aa   = abuf_r[k] + *(nextai[k]);
3003         nextaj = 0;
3004         for (j=0; nextaj<anzi; j++){
3005           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3006             ba_i[j] += aa[nextaj++];
3007           }
3008         }
3009         nextrow[k]++; nextai[k]++;
3010       }
3011     }
3012     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3013   }
3014   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3015   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3016 
3017   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3018   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3019   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3020   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3021   PetscFunctionReturn(0);
3022 }
3023 static PetscEvent logkey_seqstompisym = 0;
3024 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3025 {
3026   PetscErrorCode       ierr;
3027   Mat                  B_mpi;
3028   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3029   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3030   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3031   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3032   PetscInt             len,proc,*dnz,*onz;
3033   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3034   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3035   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3036   MPI_Status           *status;
3037   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3038   PetscBT              lnkbt;
3039   Mat_Merge_SeqsToMPI  *merge;
3040   PetscObjectContainer container;
3041 
3042   PetscFunctionBegin;
3043   if (!logkey_seqstompisym) {
3044     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3045   }
3046   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3047 
3048   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3049   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3050 
3051   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3052   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3053 
3054   /* determine row ownership */
3055   /*---------------------------------------------------------*/
3056   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3057   if (m == PETSC_DECIDE) {
3058     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3059   } else {
3060     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3061   }
3062   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3063   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3064   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3065 
3066   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3067   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3068 
3069   /* determine the number of messages to send, their lengths */
3070   /*---------------------------------------------------------*/
3071   len_s  = merge->len_s;
3072 
3073   len = 0;  /* length of buf_si[] */
3074   merge->nsend = 0;
3075   for (proc=0; proc<size; proc++){
3076     len_si[proc] = 0;
3077     if (proc == rank){
3078       len_s[proc] = 0;
3079     } else {
3080       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3081       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3082     }
3083     if (len_s[proc]) {
3084       merge->nsend++;
3085       nrows = 0;
3086       for (i=owners[proc]; i<owners[proc+1]; i++){
3087         if (ai[i+1] > ai[i]) nrows++;
3088       }
3089       len_si[proc] = 2*(nrows+1);
3090       len += len_si[proc];
3091     }
3092   }
3093 
3094   /* determine the number and length of messages to receive for ij-structure */
3095   /*-------------------------------------------------------------------------*/
3096   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3097   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3098 
3099   /* post the Irecv of j-structure */
3100   /*-------------------------------*/
3101   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3102   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3103 
3104   /* post the Isend of j-structure */
3105   /*--------------------------------*/
3106   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3107   sj_waits = si_waits + merge->nsend;
3108 
3109   for (proc=0, k=0; proc<size; proc++){
3110     if (!len_s[proc]) continue;
3111     i = owners[proc];
3112     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3113     k++;
3114   }
3115 
3116   /* receives and sends of j-structure are complete */
3117   /*------------------------------------------------*/
3118   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3119   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3120 
3121   /* send and recv i-structure */
3122   /*---------------------------*/
3123   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3124   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3125 
3126   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3127   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3128   for (proc=0,k=0; proc<size; proc++){
3129     if (!len_s[proc]) continue;
3130     /* form outgoing message for i-structure:
3131          buf_si[0]:                 nrows to be sent
3132                [1:nrows]:           row index (global)
3133                [nrows+1:2*nrows+1]: i-structure index
3134     */
3135     /*-------------------------------------------*/
3136     nrows = len_si[proc]/2 - 1;
3137     buf_si_i    = buf_si + nrows+1;
3138     buf_si[0]   = nrows;
3139     buf_si_i[0] = 0;
3140     nrows = 0;
3141     for (i=owners[proc]; i<owners[proc+1]; i++){
3142       anzi = ai[i+1] - ai[i];
3143       if (anzi) {
3144         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3145         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3146         nrows++;
3147       }
3148     }
3149     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3150     k++;
3151     buf_si += len_si[proc];
3152   }
3153 
3154   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3155   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3156 
3157   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3158   for (i=0; i<merge->nrecv; i++){
3159     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3160   }
3161 
3162   ierr = PetscFree(len_si);CHKERRQ(ierr);
3163   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3164   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3165   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3166   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3167   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3168   ierr = PetscFree(status);CHKERRQ(ierr);
3169 
3170   /* compute a local seq matrix in each processor */
3171   /*----------------------------------------------*/
3172   /* allocate bi array and free space for accumulating nonzero column info */
3173   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3174   bi[0] = 0;
3175 
3176   /* create and initialize a linked list */
3177   nlnk = N+1;
3178   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3179 
3180   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3181   len = 0;
3182   len  = ai[owners[rank+1]] - ai[owners[rank]];
3183   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3184   current_space = free_space;
3185 
3186   /* determine symbolic info for each local row */
3187   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3188   nextrow = buf_ri_k + merge->nrecv;
3189   nextai  = nextrow + merge->nrecv;
3190   for (k=0; k<merge->nrecv; k++){
3191     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3192     nrows = *buf_ri_k[k];
3193     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3194     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3195   }
3196 
3197   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3198   len = 0;
3199   for (i=0;i<m;i++) {
3200     bnzi   = 0;
3201     /* add local non-zero cols of this proc's seqmat into lnk */
3202     arow   = owners[rank] + i;
3203     anzi   = ai[arow+1] - ai[arow];
3204     aj     = a->j + ai[arow];
3205     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3206     bnzi += nlnk;
3207     /* add received col data into lnk */
3208     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3209       if (i == *nextrow[k]) { /* i-th row */
3210         anzi = *(nextai[k]+1) - *nextai[k];
3211         aj   = buf_rj[k] + *nextai[k];
3212         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3213         bnzi += nlnk;
3214         nextrow[k]++; nextai[k]++;
3215       }
3216     }
3217     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3218 
3219     /* if free space is not available, make more free space */
3220     if (current_space->local_remaining<bnzi) {
3221       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3222       nspacedouble++;
3223     }
3224     /* copy data into free space, then initialize lnk */
3225     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3226     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3227 
3228     current_space->array           += bnzi;
3229     current_space->local_used      += bnzi;
3230     current_space->local_remaining -= bnzi;
3231 
3232     bi[i+1] = bi[i] + bnzi;
3233   }
3234 
3235   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3236 
3237   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3238   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3239   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3240 
3241   /* create symbolic parallel matrix B_mpi */
3242   /*---------------------------------------*/
3243   if (n==PETSC_DECIDE) {
3244     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3245   } else {
3246     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3247   }
3248   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3249   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3250   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3251 
3252   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3253   B_mpi->assembled     = PETSC_FALSE;
3254   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3255   merge->bi            = bi;
3256   merge->bj            = bj;
3257   merge->buf_ri        = buf_ri;
3258   merge->buf_rj        = buf_rj;
3259   merge->coi           = PETSC_NULL;
3260   merge->coj           = PETSC_NULL;
3261   merge->owners_co     = PETSC_NULL;
3262 
3263   /* attach the supporting struct to B_mpi for reuse */
3264   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3265   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3266   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3267   *mpimat = B_mpi;
3268   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3269   PetscFunctionReturn(0);
3270 }
3271 
3272 static PetscEvent logkey_seqstompi = 0;
3273 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3274 {
3275   PetscErrorCode   ierr;
3276 
3277   PetscFunctionBegin;
3278   if (!logkey_seqstompi) {
3279     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3280   }
3281   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3282   if (scall == MAT_INITIAL_MATRIX){
3283     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3284   }
3285   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3286   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3287   PetscFunctionReturn(0);
3288 }
3289 static PetscEvent logkey_getlocalmat = 0;
3290 #undef __FUNCT__
3291 #define __FUNCT__ "MatGetLocalMat"
3292 /*@C
3293      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3294 
3295     Not Collective
3296 
3297    Input Parameters:
3298 +    A - the matrix
3299 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3300 
3301    Output Parameter:
3302 .    A_loc - the local sequential matrix generated
3303 
3304     Level: developer
3305 
3306 @*/
3307 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3308 {
3309   PetscErrorCode  ierr;
3310   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3311   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3312   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3313   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3314   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3315   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3316 
3317   PetscFunctionBegin;
3318   if (!logkey_getlocalmat) {
3319     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3320   }
3321   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3322   if (scall == MAT_INITIAL_MATRIX){
3323     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3324     ci[0] = 0;
3325     for (i=0; i<am; i++){
3326       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3327     }
3328     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3329     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3330     k = 0;
3331     for (i=0; i<am; i++) {
3332       ncols_o = bi[i+1] - bi[i];
3333       ncols_d = ai[i+1] - ai[i];
3334       /* off-diagonal portion of A */
3335       for (jo=0; jo<ncols_o; jo++) {
3336         col = cmap[*bj];
3337         if (col >= cstart) break;
3338         cj[k]   = col; bj++;
3339         ca[k++] = *ba++;
3340       }
3341       /* diagonal portion of A */
3342       for (j=0; j<ncols_d; j++) {
3343         cj[k]   = cstart + *aj++;
3344         ca[k++] = *aa++;
3345       }
3346       /* off-diagonal portion of A */
3347       for (j=jo; j<ncols_o; j++) {
3348         cj[k]   = cmap[*bj++];
3349         ca[k++] = *ba++;
3350       }
3351     }
3352     /* put together the new matrix */
3353     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3354     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3355     /* Since these are PETSc arrays, change flags to free them as necessary. */
3356     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3357     mat->freedata = PETSC_TRUE;
3358     mat->nonew    = 0;
3359   } else if (scall == MAT_REUSE_MATRIX){
3360     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3361     ci = mat->i; cj = mat->j; ca = mat->a;
3362     for (i=0; i<am; i++) {
3363       /* off-diagonal portion of A */
3364       ncols_o = bi[i+1] - bi[i];
3365       for (jo=0; jo<ncols_o; jo++) {
3366         col = cmap[*bj];
3367         if (col >= cstart) break;
3368         *ca++ = *ba++; bj++;
3369       }
3370       /* diagonal portion of A */
3371       ncols_d = ai[i+1] - ai[i];
3372       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3373       /* off-diagonal portion of A */
3374       for (j=jo; j<ncols_o; j++) {
3375         *ca++ = *ba++; bj++;
3376       }
3377     }
3378   } else {
3379     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3380   }
3381 
3382   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3383   PetscFunctionReturn(0);
3384 }
3385 
3386 static PetscEvent logkey_getlocalmatcondensed = 0;
3387 #undef __FUNCT__
3388 #define __FUNCT__ "MatGetLocalMatCondensed"
3389 /*@C
3390      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3391 
3392     Not Collective
3393 
3394    Input Parameters:
3395 +    A - the matrix
3396 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3397 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3398 
3399    Output Parameter:
3400 .    A_loc - the local sequential matrix generated
3401 
3402     Level: developer
3403 
3404 @*/
3405 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3406 {
3407   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3408   PetscErrorCode    ierr;
3409   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3410   IS                isrowa,iscola;
3411   Mat               *aloc;
3412 
3413   PetscFunctionBegin;
3414   if (!logkey_getlocalmatcondensed) {
3415     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3416   }
3417   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3418   if (!row){
3419     start = a->rstart; end = a->rend;
3420     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3421   } else {
3422     isrowa = *row;
3423   }
3424   if (!col){
3425     start = a->cstart;
3426     cmap  = a->garray;
3427     nzA   = a->A->n;
3428     nzB   = a->B->n;
3429     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3430     ncols = 0;
3431     for (i=0; i<nzB; i++) {
3432       if (cmap[i] < start) idx[ncols++] = cmap[i];
3433       else break;
3434     }
3435     imark = i;
3436     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3437     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3438     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3439     ierr = PetscFree(idx);CHKERRQ(ierr);
3440   } else {
3441     iscola = *col;
3442   }
3443   if (scall != MAT_INITIAL_MATRIX){
3444     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3445     aloc[0] = *A_loc;
3446   }
3447   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3448   *A_loc = aloc[0];
3449   ierr = PetscFree(aloc);CHKERRQ(ierr);
3450   if (!row){
3451     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3452   }
3453   if (!col){
3454     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3455   }
3456   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3457   PetscFunctionReturn(0);
3458 }
3459 
3460 static PetscEvent logkey_GetBrowsOfAcols = 0;
3461 #undef __FUNCT__
3462 #define __FUNCT__ "MatGetBrowsOfAcols"
3463 /*@C
3464     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3465 
3466     Collective on Mat
3467 
3468    Input Parameters:
3469 +    A,B - the matrices in mpiaij format
3470 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3471 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3472 
3473    Output Parameter:
3474 +    rowb, colb - index sets of rows and columns of B to extract
3475 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3476 -    B_seq - the sequential matrix generated
3477 
3478     Level: developer
3479 
3480 @*/
3481 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3482 {
3483   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3484   PetscErrorCode    ierr;
3485   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3486   IS                isrowb,iscolb;
3487   Mat               *bseq;
3488 
3489   PetscFunctionBegin;
3490   if (a->cstart != b->rstart || a->cend != b->rend){
3491     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3492   }
3493   if (!logkey_GetBrowsOfAcols) {
3494     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3495   }
3496   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3497 
3498   if (scall == MAT_INITIAL_MATRIX){
3499     start = a->cstart;
3500     cmap  = a->garray;
3501     nzA   = a->A->n;
3502     nzB   = a->B->n;
3503     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3504     ncols = 0;
3505     for (i=0; i<nzB; i++) {  /* row < local row index */
3506       if (cmap[i] < start) idx[ncols++] = cmap[i];
3507       else break;
3508     }
3509     imark = i;
3510     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3511     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3512     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3513     ierr = PetscFree(idx);CHKERRQ(ierr);
3514     *brstart = imark;
3515     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3516   } else {
3517     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3518     isrowb = *rowb; iscolb = *colb;
3519     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3520     bseq[0] = *B_seq;
3521   }
3522   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3523   *B_seq = bseq[0];
3524   ierr = PetscFree(bseq);CHKERRQ(ierr);
3525   if (!rowb){
3526     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3527   } else {
3528     *rowb = isrowb;
3529   }
3530   if (!colb){
3531     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3532   } else {
3533     *colb = iscolb;
3534   }
3535   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3536   PetscFunctionReturn(0);
3537 }
3538 
3539 static PetscEvent logkey_GetBrowsOfAocols = 0;
3540 #undef __FUNCT__
3541 #define __FUNCT__ "MatGetBrowsOfAoCols"
3542 /*@C
3543     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3544     of the OFF-DIAGONAL portion of local A
3545 
3546     Collective on Mat
3547 
3548    Input Parameters:
3549 +    A,B - the matrices in mpiaij format
3550 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3551 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3552 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3553 
3554    Output Parameter:
3555 +    B_oth - the sequential matrix generated
3556 
3557     Level: developer
3558 
3559 @*/
3560 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3561 {
3562   VecScatter_MPI_General *gen_to,*gen_from;
3563   PetscErrorCode         ierr;
3564   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3565   Mat_SeqAIJ             *b_oth;
3566   VecScatter             ctx=a->Mvctx;
3567   MPI_Comm               comm=ctx->comm;
3568   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3569   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3570   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3571   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3572   MPI_Request            *rwaits,*swaits;
3573   MPI_Status             *sstatus,rstatus;
3574   PetscInt               *cols;
3575   PetscScalar            *vals;
3576   PetscMPIInt            j;
3577 
3578   PetscFunctionBegin;
3579   if (a->cstart != b->rstart || a->cend != b->rend){
3580     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3581   }
3582   if (!logkey_GetBrowsOfAocols) {
3583     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3584   }
3585   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3586   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3587 
3588   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3589   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3590   rvalues  = gen_from->values; /* holds the length of sending row */
3591   svalues  = gen_to->values;   /* holds the length of receiving row */
3592   nrecvs   = gen_from->n;
3593   nsends   = gen_to->n;
3594   rwaits   = gen_from->requests;
3595   swaits   = gen_to->requests;
3596   srow     = gen_to->indices;   /* local row index to be sent */
3597   rstarts  = gen_from->starts;
3598   sstarts  = gen_to->starts;
3599   rprocs   = gen_from->procs;
3600   sprocs   = gen_to->procs;
3601   sstatus  = gen_to->sstatus;
3602 
3603   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3604   if (scall == MAT_INITIAL_MATRIX){
3605     /* i-array */
3606     /*---------*/
3607     /*  post receives */
3608     for (i=0; i<nrecvs; i++){
3609       rowlen = (PetscInt*)rvalues + rstarts[i];
3610       nrows = rstarts[i+1]-rstarts[i];
3611       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3612     }
3613 
3614     /* pack the outgoing message */
3615     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3616     rstartsj = sstartsj + nsends +1;
3617     sstartsj[0] = 0;  rstartsj[0] = 0;
3618     len = 0; /* total length of j or a array to be sent */
3619     k = 0;
3620     for (i=0; i<nsends; i++){
3621       rowlen = (PetscInt*)svalues + sstarts[i];
3622       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3623       for (j=0; j<nrows; j++) {
3624         row = srow[k] + b->rowners[rank]; /* global row idx */
3625         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3626         len += rowlen[j];
3627         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3628         k++;
3629       }
3630       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3631        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3632     }
3633     /* recvs and sends of i-array are completed */
3634     i = nrecvs;
3635     while (i--) {
3636       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3637     }
3638     if (nsends) {
3639       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3640     }
3641     /* allocate buffers for sending j and a arrays */
3642     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3643     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3644 
3645     /* create i-array of B_oth */
3646     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3647     b_othi[0] = 0;
3648     len = 0; /* total length of j or a array to be received */
3649     k = 0;
3650     for (i=0; i<nrecvs; i++){
3651       rowlen = (PetscInt*)rvalues + rstarts[i];
3652       nrows = rstarts[i+1]-rstarts[i];
3653       for (j=0; j<nrows; j++) {
3654         b_othi[k+1] = b_othi[k] + rowlen[j];
3655         len += rowlen[j]; k++;
3656       }
3657       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3658     }
3659 
3660     /* allocate space for j and a arrrays of B_oth */
3661     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3662     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3663 
3664     /* j-array */
3665     /*---------*/
3666     /*  post receives of j-array */
3667     for (i=0; i<nrecvs; i++){
3668       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3669       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3670     }
3671     k = 0;
3672     for (i=0; i<nsends; i++){
3673       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3674       bufJ = bufj+sstartsj[i];
3675       for (j=0; j<nrows; j++) {
3676         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3677         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3678         for (l=0; l<ncols; l++){
3679           *bufJ++ = cols[l];
3680         }
3681         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3682       }
3683       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3684     }
3685 
3686     /* recvs and sends of j-array are completed */
3687     i = nrecvs;
3688     while (i--) {
3689       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3690     }
3691     if (nsends) {
3692       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3693     }
3694   } else if (scall == MAT_REUSE_MATRIX){
3695     sstartsj = *startsj;
3696     rstartsj = sstartsj + nsends +1;
3697     bufa     = *bufa_ptr;
3698     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3699     b_otha   = b_oth->a;
3700   } else {
3701     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3702   }
3703 
3704   /* a-array */
3705   /*---------*/
3706   /*  post receives of a-array */
3707   for (i=0; i<nrecvs; i++){
3708     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3709     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3710   }
3711   k = 0;
3712   for (i=0; i<nsends; i++){
3713     nrows = sstarts[i+1]-sstarts[i];
3714     bufA = bufa+sstartsj[i];
3715     for (j=0; j<nrows; j++) {
3716       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3717       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3718       for (l=0; l<ncols; l++){
3719         *bufA++ = vals[l];
3720       }
3721       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3722 
3723     }
3724     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3725   }
3726   /* recvs and sends of a-array are completed */
3727   i = nrecvs;
3728   while (i--) {
3729     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3730   }
3731    if (nsends) {
3732     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3733   }
3734 
3735   if (scall == MAT_INITIAL_MATRIX){
3736     /* put together the new matrix */
3737     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3738 
3739     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3740     /* Since these are PETSc arrays, change flags to free them as necessary. */
3741     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3742     b_oth->freedata = PETSC_TRUE;
3743     b_oth->nonew    = 0;
3744 
3745     ierr = PetscFree(bufj);CHKERRQ(ierr);
3746     if (!startsj || !bufa_ptr){
3747       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3748       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3749     } else {
3750       *startsj  = sstartsj;
3751       *bufa_ptr = bufa;
3752     }
3753   }
3754   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3755 
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 /*MC
3760    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3761 
3762    Options Database Keys:
3763 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3764 
3765   Level: beginner
3766 
3767 .seealso: MatCreateMPIAIJ
3768 M*/
3769 
3770 EXTERN_C_BEGIN
3771 #undef __FUNCT__
3772 #define __FUNCT__ "MatCreate_MPIAIJ"
3773 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3774 {
3775   Mat_MPIAIJ     *b;
3776   PetscErrorCode ierr;
3777   PetscInt       i;
3778   PetscMPIInt    size;
3779 
3780   PetscFunctionBegin;
3781   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3782 
3783   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3784   B->data         = (void*)b;
3785   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3786   B->factor       = 0;
3787   B->bs           = 1;
3788   B->assembled    = PETSC_FALSE;
3789   B->mapping      = 0;
3790 
3791   B->insertmode      = NOT_SET_VALUES;
3792   b->size            = size;
3793   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3794 
3795   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3796   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3797 
3798   /* the information in the maps duplicates the information computed below, eventually
3799      we should remove the duplicate information that is not contained in the maps */
3800   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3801   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3802 
3803   /* build local table of row and column ownerships */
3804   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3805   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3806   b->cowners = b->rowners + b->size + 2;
3807   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3808   b->rowners[0] = 0;
3809   for (i=2; i<=b->size; i++) {
3810     b->rowners[i] += b->rowners[i-1];
3811   }
3812   b->rstart = b->rowners[b->rank];
3813   b->rend   = b->rowners[b->rank+1];
3814   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3815   b->cowners[0] = 0;
3816   for (i=2; i<=b->size; i++) {
3817     b->cowners[i] += b->cowners[i-1];
3818   }
3819   b->cstart = b->cowners[b->rank];
3820   b->cend   = b->cowners[b->rank+1];
3821 
3822   /* build cache for off array entries formed */
3823   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3824   b->donotstash  = PETSC_FALSE;
3825   b->colmap      = 0;
3826   b->garray      = 0;
3827   b->roworiented = PETSC_TRUE;
3828 
3829   /* stuff used for matrix vector multiply */
3830   b->lvec      = PETSC_NULL;
3831   b->Mvctx     = PETSC_NULL;
3832 
3833   /* stuff for MatGetRow() */
3834   b->rowindices   = 0;
3835   b->rowvalues    = 0;
3836   b->getrowactive = PETSC_FALSE;
3837 
3838   /* Explicitly create 2 MATSEQAIJ matrices. */
3839   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3840   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3841   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3842   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3843   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3844   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3845 
3846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3847                                      "MatStoreValues_MPIAIJ",
3848                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3849   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3850                                      "MatRetrieveValues_MPIAIJ",
3851                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3852   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3853 				     "MatGetDiagonalBlock_MPIAIJ",
3854                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3855   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3856 				     "MatIsTranspose_MPIAIJ",
3857 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3858   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3859 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3860 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3861   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3862 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3863 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3864   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3865 				     "MatDiagonalScaleLocal_MPIAIJ",
3866 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3867   PetscFunctionReturn(0);
3868 }
3869 EXTERN_C_END
3870