xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5ef145eb8df80296d879d7eba3b3c32bcd261bc3)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (PetscInt*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (PetscInt*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar    value;
190   PetscErrorCode ierr;
191   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
192   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth     roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat            A = aij->A;
197   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
198   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar    *aa = a->a;
200   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat            B = aij->B;
202   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
203   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar    *ba = b->a;
205 
206   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   PetscInt       nonew = a->nonew,shift=0;
208   PetscScalar    *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
278   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   PetscInt       nstash,reallocs;
322   InsertMode     addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   PetscMPIInt    n;
351   PetscInt       i,j,rstart,ncols,flg;
352   PetscInt       *row,*col,other_disassembled;
353   PetscScalar    *val;
354   InsertMode     addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for (i=0; i<n;) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388       /* reaccess the b because aij->B was changed */
389       b    = (Mat_SeqAIJ *)aij->B->data;
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403 
404   /* used by MatAXPY() */
405   a->xtoy = 0; b->xtoy = 0;
406   a->XtoY = 0; b->XtoY = 0;
407 
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
413 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
414 {
415   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
416   PetscErrorCode ierr;
417 
418   PetscFunctionBegin;
419   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
420   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatZeroRows_MPIAIJ"
426 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
427 {
428   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
429   PetscErrorCode ierr;
430   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
431   PetscInt       i,N,*rows,*owners = l->rowners;
432   PetscInt       *nprocs,j,idx,nsends,row;
433   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
434   PetscInt       *rvalues,count,base,slen,*source;
435   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
436   MPI_Comm       comm = A->comm;
437   MPI_Request    *send_waits,*recv_waits;
438   MPI_Status     recv_status,*send_status;
439   IS             istmp;
440   PetscTruth     found;
441 
442   PetscFunctionBegin;
443   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
444   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
445 
446   /*  first count number of contributors to each processor */
447   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
448   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
450   for (i=0; i<N; i++) {
451     idx = rows[i];
452     found = PETSC_FALSE;
453     for (j=0; j<size; j++) {
454       if (idx >= owners[j] && idx < owners[j+1]) {
455         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
456       }
457     }
458     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
459   }
460   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
461 
462   /* inform other processors of number of messages and max length*/
463   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
464 
465   /* post receives:   */
466   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
467   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
468   for (i=0; i<nrecvs; i++) {
469     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
470   }
471 
472   /* do sends:
473       1) starts[i] gives the starting index in svalues for stuff going to
474          the ith processor
475   */
476   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
477   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
478   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
479   starts[0] = 0;
480   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
481   for (i=0; i<N; i++) {
482     svalues[starts[owner[i]]++] = rows[i];
483   }
484   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
485 
486   starts[0] = 0;
487   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
488   count = 0;
489   for (i=0; i<size; i++) {
490     if (nprocs[2*i+1]) {
491       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
492     }
493   }
494   ierr = PetscFree(starts);CHKERRQ(ierr);
495 
496   base = owners[rank];
497 
498   /*  wait on receives */
499   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
500   source = lens + nrecvs;
501   count  = nrecvs; slen = 0;
502   while (count) {
503     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
504     /* unpack receives into our local space */
505     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
506     source[imdex]  = recv_status.MPI_SOURCE;
507     lens[imdex]    = n;
508     slen          += n;
509     count--;
510   }
511   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
512 
513   /* move the data into the send scatter */
514   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
515   count = 0;
516   for (i=0; i<nrecvs; i++) {
517     values = rvalues + i*nmax;
518     for (j=0; j<lens[i]; j++) {
519       lrows[count++] = values[j] - base;
520     }
521   }
522   ierr = PetscFree(rvalues);CHKERRQ(ierr);
523   ierr = PetscFree(lens);CHKERRQ(ierr);
524   ierr = PetscFree(owner);CHKERRQ(ierr);
525   ierr = PetscFree(nprocs);CHKERRQ(ierr);
526 
527   /* actually zap the local rows */
528   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
529   PetscLogObjectParent(A,istmp);
530 
531   /*
532         Zero the required rows. If the "diagonal block" of the matrix
533      is square and the user wishes to set the diagonal we use seperate
534      code so that MatSetValues() is not called for each diagonal allocating
535      new memory, thus calling lots of mallocs and slowing things down.
536 
537        Contributed by: Mathew Knepley
538   */
539   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
540   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
541   if (diag && (l->A->M == l->A->N)) {
542     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
543   } else if (diag) {
544     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
545     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
546       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
547 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
548     }
549     for (i = 0; i < slen; i++) {
550       row  = lrows[i] + rstart;
551       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
552     }
553     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555   } else {
556     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
557   }
558   ierr = ISDestroy(istmp);CHKERRQ(ierr);
559   ierr = PetscFree(lrows);CHKERRQ(ierr);
560 
561   /* wait on sends */
562   if (nsends) {
563     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
564     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
565     ierr = PetscFree(send_status);CHKERRQ(ierr);
566   }
567   ierr = PetscFree(send_waits);CHKERRQ(ierr);
568   ierr = PetscFree(svalues);CHKERRQ(ierr);
569 
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatMult_MPIAIJ"
575 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
576 {
577   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
578   PetscErrorCode ierr;
579   PetscInt       nt;
580 
581   PetscFunctionBegin;
582   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
583   if (nt != A->n) {
584     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
585   }
586   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
588   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
589   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
590   PetscFunctionReturn(0);
591 }
592 
593 #undef __FUNCT__
594 #define __FUNCT__ "MatMultAdd_MPIAIJ"
595 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
596 {
597   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
598   PetscErrorCode ierr;
599 
600   PetscFunctionBegin;
601   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
603   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
610 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
611 {
612   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
613   PetscErrorCode ierr;
614 
615   PetscFunctionBegin;
616   /* do nondiagonal part */
617   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
618   /* send it on its way */
619   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
620   /* do local part */
621   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
622   /* receive remote parts: note this assumes the values are not actually */
623   /* inserted in yy until the next line, which is true for my implementation*/
624   /* but is not perhaps always true. */
625   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
626   PetscFunctionReturn(0);
627 }
628 
629 EXTERN_C_BEGIN
630 #undef __FUNCT__
631 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
632 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
633 {
634   MPI_Comm       comm;
635   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
636   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
637   IS             Me,Notme;
638   PetscErrorCode ierr;
639   PetscInt       M,N,first,last,*notme,i;
640   PetscMPIInt    size;
641 
642   PetscFunctionBegin;
643 
644   /* Easy test: symmetric diagonal block */
645   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
646   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
647   if (!*f) PetscFunctionReturn(0);
648   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
649   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
650   if (size == 1) PetscFunctionReturn(0);
651 
652   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
653   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
654   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
655   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
656   for (i=0; i<first; i++) notme[i] = i;
657   for (i=last; i<M; i++) notme[i-last+first] = i;
658   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
659   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
660   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
661   Aoff = Aoffs[0];
662   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
663   Boff = Boffs[0];
664   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
665   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
666   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
667   ierr = ISDestroy(Me);CHKERRQ(ierr);
668   ierr = ISDestroy(Notme);CHKERRQ(ierr);
669 
670   PetscFunctionReturn(0);
671 }
672 EXTERN_C_END
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
676 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
677 {
678   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
679   PetscErrorCode ierr;
680 
681   PetscFunctionBegin;
682   /* do nondiagonal part */
683   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
684   /* send it on its way */
685   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
686   /* do local part */
687   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
688   /* receive remote parts: note this assumes the values are not actually */
689   /* inserted in yy until the next line, which is true for my implementation*/
690   /* but is not perhaps always true. */
691   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 /*
696   This only works correctly for square matrices where the subblock A->A is the
697    diagonal block
698 */
699 #undef __FUNCT__
700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
701 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
702 {
703   PetscErrorCode ierr;
704   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
705 
706   PetscFunctionBegin;
707   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
708   if (a->rstart != a->cstart || a->rend != a->cend) {
709     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
710   }
711   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatScale_MPIAIJ"
717 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721 
722   PetscFunctionBegin;
723   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
724   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatDestroy_MPIAIJ"
730 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
731 {
732   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
733   PetscErrorCode ierr;
734 
735   PetscFunctionBegin;
736 #if defined(PETSC_USE_LOG)
737   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
738 #endif
739   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
740   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
741   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
742   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
743 #if defined (PETSC_USE_CTABLE)
744   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
745 #else
746   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
747 #endif
748   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
749   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
750   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
751   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
752   ierr = PetscFree(aij);CHKERRQ(ierr);
753 
754   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
755   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
756   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
757   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
760   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatView_MPIAIJ_Binary"
766 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
767 {
768   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
769   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
770   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
771   PetscErrorCode    ierr;
772   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
773   int               fd;
774   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
775   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
776   PetscScalar       *column_values;
777 
778   PetscFunctionBegin;
779   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
780   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
781   nz   = A->nz + B->nz;
782   if (!rank) {
783     header[0] = MAT_FILE_COOKIE;
784     header[1] = mat->M;
785     header[2] = mat->N;
786     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
787     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
788     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     /* get largest number of rows any processor has */
790     rlen = mat->m;
791     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
792     for (i=1; i<size; i++) {
793       rlen = PetscMax(rlen,range[i+1] - range[i]);
794     }
795   } else {
796     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
797     rlen = mat->m;
798   }
799 
800   /* load up the local row counts */
801   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
802   for (i=0; i<mat->m; i++) {
803     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
804   }
805 
806   /* store the row lengths to the file */
807   if (!rank) {
808     MPI_Status status;
809     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
810     for (i=1; i<size; i++) {
811       rlen = range[i+1] - range[i];
812       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
813       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
814     }
815   } else {
816     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
817   }
818   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
819 
820   /* load up the local column indices */
821   nzmax = nz; /* )th processor needs space a largest processor needs */
822   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
823   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
824   cnt  = 0;
825   for (i=0; i<mat->m; i++) {
826     for (j=B->i[i]; j<B->i[i+1]; j++) {
827       if ( (col = garray[B->j[j]]) > cstart) break;
828       column_indices[cnt++] = col;
829     }
830     for (k=A->i[i]; k<A->i[i+1]; k++) {
831       column_indices[cnt++] = A->j[k] + cstart;
832     }
833     for (; j<B->i[i+1]; j++) {
834       column_indices[cnt++] = garray[B->j[j]];
835     }
836   }
837   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
838 
839   /* store the column indices to the file */
840   if (!rank) {
841     MPI_Status status;
842     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
843     for (i=1; i<size; i++) {
844       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
845       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
846       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
847       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
848     }
849   } else {
850     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
851     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
852   }
853   ierr = PetscFree(column_indices);CHKERRQ(ierr);
854 
855   /* load up the local column values */
856   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
857   cnt  = 0;
858   for (i=0; i<mat->m; i++) {
859     for (j=B->i[i]; j<B->i[i+1]; j++) {
860       if ( garray[B->j[j]] > cstart) break;
861       column_values[cnt++] = B->a[j];
862     }
863     for (k=A->i[i]; k<A->i[i+1]; k++) {
864       column_values[cnt++] = A->a[k];
865     }
866     for (; j<B->i[i+1]; j++) {
867       column_values[cnt++] = B->a[j];
868     }
869   }
870   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
871 
872   /* store the column values to the file */
873   if (!rank) {
874     MPI_Status status;
875     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
876     for (i=1; i<size; i++) {
877       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
878       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
879       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
880       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
881     }
882   } else {
883     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
884     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
885   }
886   ierr = PetscFree(column_values);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
892 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
893 {
894   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
895   PetscErrorCode    ierr;
896   PetscMPIInt       rank = aij->rank,size = aij->size;
897   PetscTruth        isdraw,iascii,flg,isbinary;
898   PetscViewer       sviewer;
899   PetscViewerFormat format;
900 
901   PetscFunctionBegin;
902   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
903   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
905   if (iascii) {
906     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
907     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
908       MatInfo info;
909       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
910       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
911       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
912       if (flg) {
913         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
914 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
915       } else {
916         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
917 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
918       }
919       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
920       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
921       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
922       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
923       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
924       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
925       PetscFunctionReturn(0);
926     } else if (format == PETSC_VIEWER_ASCII_INFO) {
927       PetscFunctionReturn(0);
928     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
929       PetscFunctionReturn(0);
930     }
931   } else if (isbinary) {
932     if (size == 1) {
933       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935     } else {
936       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
937     }
938     PetscFunctionReturn(0);
939   } else if (isdraw) {
940     PetscDraw  draw;
941     PetscTruth isnull;
942     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
943     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
944   }
945 
946   if (size == 1) {
947     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
948     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
949   } else {
950     /* assemble the entire matrix onto first processor. */
951     Mat         A;
952     Mat_SeqAIJ *Aloc;
953     PetscInt         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
954     PetscScalar *a;
955 
956     if (!rank) {
957       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
958     } else {
959       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
960     }
961     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
962     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
963     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
964     PetscLogObjectParent(mat,A);
965 
966     /* copy over the A part */
967     Aloc = (Mat_SeqAIJ*)aij->A->data;
968     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
969     row = aij->rstart;
970     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
971     for (i=0; i<m; i++) {
972       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
973       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
974     }
975     aj = Aloc->j;
976     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
977 
978     /* copy over the B part */
979     Aloc = (Mat_SeqAIJ*)aij->B->data;
980     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
981     row  = aij->rstart;
982     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
983     ct   = cols;
984     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
985     for (i=0; i<m; i++) {
986       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
987       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
988     }
989     ierr = PetscFree(ct);CHKERRQ(ierr);
990     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
992     /*
993        Everyone has to call to draw the matrix since the graphics waits are
994        synchronized across all processors that share the PetscDraw object
995     */
996     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
997     if (!rank) {
998       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
999       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1000     }
1001     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1002     ierr = MatDestroy(A);CHKERRQ(ierr);
1003   }
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatView_MPIAIJ"
1009 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1010 {
1011   PetscErrorCode ierr;
1012   PetscTruth     iascii,isdraw,issocket,isbinary;
1013 
1014   PetscFunctionBegin;
1015   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1016   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1017   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1018   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1019   if (iascii || isdraw || isbinary || issocket) {
1020     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1021   } else {
1022     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatRelax_MPIAIJ"
1031 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1032 {
1033   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1034   PetscErrorCode ierr;
1035   Vec            bb1;
1036   PetscScalar    mone=-1.0;
1037 
1038   PetscFunctionBegin;
1039   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1040 
1041   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1042 
1043   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1044     if (flag & SOR_ZERO_INITIAL_GUESS) {
1045       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1046       its--;
1047     }
1048 
1049     while (its--) {
1050       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1051       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052 
1053       /* update rhs: bb1 = bb - B*x */
1054       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1055       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1056 
1057       /* local sweep */
1058       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1059       CHKERRQ(ierr);
1060     }
1061   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1062     if (flag & SOR_ZERO_INITIAL_GUESS) {
1063       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1064       its--;
1065     }
1066     while (its--) {
1067       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069 
1070       /* update rhs: bb1 = bb - B*x */
1071       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1072       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1073 
1074       /* local sweep */
1075       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1076       CHKERRQ(ierr);
1077     }
1078   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1079     if (flag & SOR_ZERO_INITIAL_GUESS) {
1080       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1081       its--;
1082     }
1083     while (its--) {
1084       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1085       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1086 
1087       /* update rhs: bb1 = bb - B*x */
1088       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1089       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1090 
1091       /* local sweep */
1092       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1093       CHKERRQ(ierr);
1094     }
1095   } else {
1096     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1097   }
1098 
1099   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1105 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1106 {
1107   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1108   Mat            A = mat->A,B = mat->B;
1109   PetscErrorCode ierr;
1110   PetscReal      isend[5],irecv[5];
1111 
1112   PetscFunctionBegin;
1113   info->block_size     = 1.0;
1114   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1115   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1116   isend[3] = info->memory;  isend[4] = info->mallocs;
1117   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1118   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1119   isend[3] += info->memory;  isend[4] += info->mallocs;
1120   if (flag == MAT_LOCAL) {
1121     info->nz_used      = isend[0];
1122     info->nz_allocated = isend[1];
1123     info->nz_unneeded  = isend[2];
1124     info->memory       = isend[3];
1125     info->mallocs      = isend[4];
1126   } else if (flag == MAT_GLOBAL_MAX) {
1127     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1128     info->nz_used      = irecv[0];
1129     info->nz_allocated = irecv[1];
1130     info->nz_unneeded  = irecv[2];
1131     info->memory       = irecv[3];
1132     info->mallocs      = irecv[4];
1133   } else if (flag == MAT_GLOBAL_SUM) {
1134     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1135     info->nz_used      = irecv[0];
1136     info->nz_allocated = irecv[1];
1137     info->nz_unneeded  = irecv[2];
1138     info->memory       = irecv[3];
1139     info->mallocs      = irecv[4];
1140   }
1141   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1142   info->fill_ratio_needed = 0;
1143   info->factor_mallocs    = 0;
1144   info->rows_global       = (double)matin->M;
1145   info->columns_global    = (double)matin->N;
1146   info->rows_local        = (double)matin->m;
1147   info->columns_local     = (double)matin->N;
1148 
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatSetOption_MPIAIJ"
1154 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1155 {
1156   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1157   PetscErrorCode ierr;
1158 
1159   PetscFunctionBegin;
1160   switch (op) {
1161   case MAT_NO_NEW_NONZERO_LOCATIONS:
1162   case MAT_YES_NEW_NONZERO_LOCATIONS:
1163   case MAT_COLUMNS_UNSORTED:
1164   case MAT_COLUMNS_SORTED:
1165   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1166   case MAT_KEEP_ZEROED_ROWS:
1167   case MAT_NEW_NONZERO_LOCATION_ERR:
1168   case MAT_USE_INODES:
1169   case MAT_DO_NOT_USE_INODES:
1170   case MAT_IGNORE_ZERO_ENTRIES:
1171     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1172     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1173     break;
1174   case MAT_ROW_ORIENTED:
1175     a->roworiented = PETSC_TRUE;
1176     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1177     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1178     break;
1179   case MAT_ROWS_SORTED:
1180   case MAT_ROWS_UNSORTED:
1181   case MAT_YES_NEW_DIAGONALS:
1182     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1183     break;
1184   case MAT_COLUMN_ORIENTED:
1185     a->roworiented = PETSC_FALSE;
1186     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1187     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1188     break;
1189   case MAT_IGNORE_OFF_PROC_ENTRIES:
1190     a->donotstash = PETSC_TRUE;
1191     break;
1192   case MAT_NO_NEW_DIAGONALS:
1193     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1194   case MAT_SYMMETRIC:
1195   case MAT_STRUCTURALLY_SYMMETRIC:
1196   case MAT_HERMITIAN:
1197   case MAT_SYMMETRY_ETERNAL:
1198     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1199     break;
1200   case MAT_NOT_SYMMETRIC:
1201   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1202   case MAT_NOT_HERMITIAN:
1203   case MAT_NOT_SYMMETRY_ETERNAL:
1204     break;
1205   default:
1206     SETERRQ(PETSC_ERR_SUP,"unknown option");
1207   }
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatGetRow_MPIAIJ"
1213 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1214 {
1215   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1216   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1217   PetscErrorCode ierr;
1218   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1219   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1220   PetscInt       *cmap,*idx_p;
1221 
1222   PetscFunctionBegin;
1223   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1224   mat->getrowactive = PETSC_TRUE;
1225 
1226   if (!mat->rowvalues && (idx || v)) {
1227     /*
1228         allocate enough space to hold information from the longest row.
1229     */
1230     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1231     PetscInt     max = 1,tmp;
1232     for (i=0; i<matin->m; i++) {
1233       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1234       if (max < tmp) { max = tmp; }
1235     }
1236     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1237     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1238   }
1239 
1240   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1241   lrow = row - rstart;
1242 
1243   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1244   if (!v)   {pvA = 0; pvB = 0;}
1245   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1246   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1247   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1248   nztot = nzA + nzB;
1249 
1250   cmap  = mat->garray;
1251   if (v  || idx) {
1252     if (nztot) {
1253       /* Sort by increasing column numbers, assuming A and B already sorted */
1254       PetscInt imark = -1;
1255       if (v) {
1256         *v = v_p = mat->rowvalues;
1257         for (i=0; i<nzB; i++) {
1258           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1259           else break;
1260         }
1261         imark = i;
1262         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1263         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1264       }
1265       if (idx) {
1266         *idx = idx_p = mat->rowindices;
1267         if (imark > -1) {
1268           for (i=0; i<imark; i++) {
1269             idx_p[i] = cmap[cworkB[i]];
1270           }
1271         } else {
1272           for (i=0; i<nzB; i++) {
1273             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1274             else break;
1275           }
1276           imark = i;
1277         }
1278         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1279         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1280       }
1281     } else {
1282       if (idx) *idx = 0;
1283       if (v)   *v   = 0;
1284     }
1285   }
1286   *nz = nztot;
1287   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1288   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1294 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1295 {
1296   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1297 
1298   PetscFunctionBegin;
1299   if (aij->getrowactive == PETSC_FALSE) {
1300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1301   }
1302   aij->getrowactive = PETSC_FALSE;
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatNorm_MPIAIJ"
1308 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1309 {
1310   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1311   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1312   PetscErrorCode ierr;
1313   PetscInt       i,j,cstart = aij->cstart;
1314   PetscReal      sum = 0.0;
1315   PetscScalar    *v;
1316 
1317   PetscFunctionBegin;
1318   if (aij->size == 1) {
1319     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1320   } else {
1321     if (type == NORM_FROBENIUS) {
1322       v = amat->a;
1323       for (i=0; i<amat->nz; i++) {
1324 #if defined(PETSC_USE_COMPLEX)
1325         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1326 #else
1327         sum += (*v)*(*v); v++;
1328 #endif
1329       }
1330       v = bmat->a;
1331       for (i=0; i<bmat->nz; i++) {
1332 #if defined(PETSC_USE_COMPLEX)
1333         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1334 #else
1335         sum += (*v)*(*v); v++;
1336 #endif
1337       }
1338       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1339       *norm = sqrt(*norm);
1340     } else if (type == NORM_1) { /* max column norm */
1341       PetscReal *tmp,*tmp2;
1342       PetscInt    *jj,*garray = aij->garray;
1343       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1344       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1345       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1346       *norm = 0.0;
1347       v = amat->a; jj = amat->j;
1348       for (j=0; j<amat->nz; j++) {
1349         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1350       }
1351       v = bmat->a; jj = bmat->j;
1352       for (j=0; j<bmat->nz; j++) {
1353         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1354       }
1355       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1356       for (j=0; j<mat->N; j++) {
1357         if (tmp2[j] > *norm) *norm = tmp2[j];
1358       }
1359       ierr = PetscFree(tmp);CHKERRQ(ierr);
1360       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1361     } else if (type == NORM_INFINITY) { /* max row norm */
1362       PetscReal ntemp = 0.0;
1363       for (j=0; j<aij->A->m; j++) {
1364         v = amat->a + amat->i[j];
1365         sum = 0.0;
1366         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1367           sum += PetscAbsScalar(*v); v++;
1368         }
1369         v = bmat->a + bmat->i[j];
1370         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1371           sum += PetscAbsScalar(*v); v++;
1372         }
1373         if (sum > ntemp) ntemp = sum;
1374       }
1375       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1376     } else {
1377       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1378     }
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatTranspose_MPIAIJ"
1385 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1386 {
1387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1388   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1389   PetscErrorCode ierr;
1390   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1391   Mat            B;
1392   PetscScalar    *array;
1393 
1394   PetscFunctionBegin;
1395   if (!matout && M != N) {
1396     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1397   }
1398 
1399   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1400   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1401   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1402 
1403   /* copy over the A part */
1404   Aloc = (Mat_SeqAIJ*)a->A->data;
1405   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1406   row = a->rstart;
1407   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1408   for (i=0; i<m; i++) {
1409     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1410     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1411   }
1412   aj = Aloc->j;
1413   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1414 
1415   /* copy over the B part */
1416   Aloc = (Mat_SeqAIJ*)a->B->data;
1417   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1418   row  = a->rstart;
1419   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1420   ct   = cols;
1421   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1422   for (i=0; i<m; i++) {
1423     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1424     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1425   }
1426   ierr = PetscFree(ct);CHKERRQ(ierr);
1427   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1429   if (matout) {
1430     *matout = B;
1431   } else {
1432     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1433   }
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1439 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1440 {
1441   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1442   Mat            a = aij->A,b = aij->B;
1443   PetscErrorCode ierr;
1444   PetscInt       s1,s2,s3;
1445 
1446   PetscFunctionBegin;
1447   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1448   if (rr) {
1449     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1450     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1451     /* Overlap communication with computation. */
1452     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1453   }
1454   if (ll) {
1455     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1456     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1457     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1458   }
1459   /* scale  the diagonal block */
1460   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1461 
1462   if (rr) {
1463     /* Do a scatter end and then right scale the off-diagonal block */
1464     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1465     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1466   }
1467 
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1474 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1475 {
1476   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1477   PetscErrorCode ierr;
1478 
1479   PetscFunctionBegin;
1480   if (!a->rank) {
1481     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1482   }
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1488 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,PetscInt *bs)
1489 {
1490   PetscFunctionBegin;
1491   *bs = 1;
1492   PetscFunctionReturn(0);
1493 }
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1496 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1497 {
1498   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1499   PetscErrorCode ierr;
1500 
1501   PetscFunctionBegin;
1502   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #undef __FUNCT__
1507 #define __FUNCT__ "MatEqual_MPIAIJ"
1508 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1509 {
1510   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1511   Mat            a,b,c,d;
1512   PetscTruth     flg;
1513   PetscErrorCode ierr;
1514 
1515   PetscFunctionBegin;
1516   a = matA->A; b = matA->B;
1517   c = matB->A; d = matB->B;
1518 
1519   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1520   if (flg == PETSC_TRUE) {
1521     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1522   }
1523   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatCopy_MPIAIJ"
1529 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1530 {
1531   PetscErrorCode ierr;
1532   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1533   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1534 
1535   PetscFunctionBegin;
1536   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1537   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1538     /* because of the column compression in the off-processor part of the matrix a->B,
1539        the number of columns in a->B and b->B may be different, hence we cannot call
1540        the MatCopy() directly on the two parts. If need be, we can provide a more
1541        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1542        then copying the submatrices */
1543     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1544   } else {
1545     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1546     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1547   }
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1553 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1554 {
1555   PetscErrorCode ierr;
1556 
1557   PetscFunctionBegin;
1558   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #include "petscblaslapack.h"
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatAXPY_MPIAIJ"
1565 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1566 {
1567   PetscErrorCode ierr;
1568   PetscInt       i;
1569   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1570   PetscBLASInt   bnz,one=1;
1571   Mat_SeqAIJ     *x,*y;
1572 
1573   PetscFunctionBegin;
1574   if (str == SAME_NONZERO_PATTERN) {
1575     x = (Mat_SeqAIJ *)xx->A->data;
1576     y = (Mat_SeqAIJ *)yy->A->data;
1577     bnz = (PetscBLASInt)x->nz;
1578     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1579     x = (Mat_SeqAIJ *)xx->B->data;
1580     y = (Mat_SeqAIJ *)yy->B->data;
1581     bnz = (PetscBLASInt)x->nz;
1582     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1583   } else if (str == SUBSET_NONZERO_PATTERN) {
1584     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1585 
1586     x = (Mat_SeqAIJ *)xx->B->data;
1587     y = (Mat_SeqAIJ *)yy->B->data;
1588     if (y->xtoy && y->XtoY != xx->B) {
1589       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1590       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1591     }
1592     if (!y->xtoy) { /* get xtoy */
1593       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1594       y->XtoY = xx->B;
1595     }
1596     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1597   } else {
1598     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1599   }
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 /* -------------------------------------------------------------------*/
1604 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1605        MatGetRow_MPIAIJ,
1606        MatRestoreRow_MPIAIJ,
1607        MatMult_MPIAIJ,
1608 /* 4*/ MatMultAdd_MPIAIJ,
1609        MatMultTranspose_MPIAIJ,
1610        MatMultTransposeAdd_MPIAIJ,
1611        0,
1612        0,
1613        0,
1614 /*10*/ 0,
1615        0,
1616        0,
1617        MatRelax_MPIAIJ,
1618        MatTranspose_MPIAIJ,
1619 /*15*/ MatGetInfo_MPIAIJ,
1620        MatEqual_MPIAIJ,
1621        MatGetDiagonal_MPIAIJ,
1622        MatDiagonalScale_MPIAIJ,
1623        MatNorm_MPIAIJ,
1624 /*20*/ MatAssemblyBegin_MPIAIJ,
1625        MatAssemblyEnd_MPIAIJ,
1626        0,
1627        MatSetOption_MPIAIJ,
1628        MatZeroEntries_MPIAIJ,
1629 /*25*/ MatZeroRows_MPIAIJ,
1630 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_64BIT_INT)
1631        MatLUFactorSymbolic_MPIAIJ_TFS,
1632 #else
1633        0,
1634 #endif
1635        0,
1636        0,
1637        0,
1638 /*30*/ MatSetUpPreallocation_MPIAIJ,
1639        0,
1640        0,
1641        0,
1642        0,
1643 /*35*/ MatDuplicate_MPIAIJ,
1644        0,
1645        0,
1646        0,
1647        0,
1648 /*40*/ MatAXPY_MPIAIJ,
1649        MatGetSubMatrices_MPIAIJ,
1650        MatIncreaseOverlap_MPIAIJ,
1651        MatGetValues_MPIAIJ,
1652        MatCopy_MPIAIJ,
1653 /*45*/ MatPrintHelp_MPIAIJ,
1654        MatScale_MPIAIJ,
1655        0,
1656        0,
1657        0,
1658 /*50*/ MatGetBlockSize_MPIAIJ,
1659        0,
1660        0,
1661        0,
1662        0,
1663 /*55*/ MatFDColoringCreate_MPIAIJ,
1664        0,
1665        MatSetUnfactored_MPIAIJ,
1666        0,
1667        0,
1668 /*60*/ MatGetSubMatrix_MPIAIJ,
1669        MatDestroy_MPIAIJ,
1670        MatView_MPIAIJ,
1671        MatGetPetscMaps_Petsc,
1672        0,
1673 /*65*/ 0,
1674        0,
1675        0,
1676        0,
1677        0,
1678 /*70*/ 0,
1679        0,
1680        MatSetColoring_MPIAIJ,
1681        MatSetValuesAdic_MPIAIJ,
1682        MatSetValuesAdifor_MPIAIJ,
1683 /*75*/ 0,
1684        0,
1685        0,
1686        0,
1687        0,
1688 /*80*/ 0,
1689        0,
1690        0,
1691        0,
1692 /*84*/ MatLoad_MPIAIJ,
1693        0,
1694        0,
1695        0,
1696        0,
1697        0,
1698 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1699        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1700        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1701        MatPtAP_MPIAIJ_MPIAIJ,
1702        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1703 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1704        0,
1705        0,
1706        0};
1707 
1708 /* ----------------------------------------------------------------------------------------*/
1709 
1710 EXTERN_C_BEGIN
1711 #undef __FUNCT__
1712 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1713 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1714 {
1715   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1720   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1721   PetscFunctionReturn(0);
1722 }
1723 EXTERN_C_END
1724 
1725 EXTERN_C_BEGIN
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1728 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1729 {
1730   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1735   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 EXTERN_C_END
1739 
1740 #include "petscpc.h"
1741 EXTERN_C_BEGIN
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1744 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1745 {
1746   Mat_MPIAIJ     *b;
1747   PetscErrorCode ierr;
1748   PetscInt       i;
1749 
1750   PetscFunctionBegin;
1751   B->preallocated = PETSC_TRUE;
1752   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1753   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1754   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1755   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1756   if (d_nnz) {
1757     for (i=0; i<B->m; i++) {
1758       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1759     }
1760   }
1761   if (o_nnz) {
1762     for (i=0; i<B->m; i++) {
1763       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1764     }
1765   }
1766   b = (Mat_MPIAIJ*)B->data;
1767   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1768   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1769 
1770   PetscFunctionReturn(0);
1771 }
1772 EXTERN_C_END
1773 
1774 /*MC
1775    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1776 
1777    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1778    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1779   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1780   for communicators controlling multiple processes.  It is recommended that you call both of
1781   the above preallocation routines for simplicity.
1782 
1783    Options Database Keys:
1784 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1785 
1786   Level: beginner
1787 
1788 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1789 M*/
1790 
1791 EXTERN_C_BEGIN
1792 #undef __FUNCT__
1793 #define __FUNCT__ "MatCreate_AIJ"
1794 PetscErrorCode MatCreate_AIJ(Mat A)
1795 {
1796   PetscErrorCode ierr;
1797   PetscMPIInt    size;
1798 
1799   PetscFunctionBegin;
1800   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1801   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1802   if (size == 1) {
1803     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1804   } else {
1805     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1806   }
1807   PetscFunctionReturn(0);
1808 }
1809 EXTERN_C_END
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1813 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1814 {
1815   Mat            mat;
1816   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   *newmat       = 0;
1821   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1822   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1823   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1824   a    = (Mat_MPIAIJ*)mat->data;
1825 
1826   mat->factor       = matin->factor;
1827   mat->assembled    = PETSC_TRUE;
1828   mat->insertmode   = NOT_SET_VALUES;
1829   mat->preallocated = PETSC_TRUE;
1830 
1831   a->rstart       = oldmat->rstart;
1832   a->rend         = oldmat->rend;
1833   a->cstart       = oldmat->cstart;
1834   a->cend         = oldmat->cend;
1835   a->size         = oldmat->size;
1836   a->rank         = oldmat->rank;
1837   a->donotstash   = oldmat->donotstash;
1838   a->roworiented  = oldmat->roworiented;
1839   a->rowindices   = 0;
1840   a->rowvalues    = 0;
1841   a->getrowactive = PETSC_FALSE;
1842 
1843   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1844   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1845   if (oldmat->colmap) {
1846 #if defined (PETSC_USE_CTABLE)
1847     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1848 #else
1849     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1850     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1851     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1852 #endif
1853   } else a->colmap = 0;
1854   if (oldmat->garray) {
1855     PetscInt len;
1856     len  = oldmat->B->n;
1857     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1858     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1859     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1860   } else a->garray = 0;
1861 
1862   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1863   PetscLogObjectParent(mat,a->lvec);
1864   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1865   PetscLogObjectParent(mat,a->Mvctx);
1866   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1867   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1868   PetscLogObjectParent(mat,a->A);
1869   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1870   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1871   PetscLogObjectParent(mat,a->B);
1872   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1873   *newmat = mat;
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #include "petscsys.h"
1878 
1879 #undef __FUNCT__
1880 #define __FUNCT__ "MatLoad_MPIAIJ"
1881 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1882 {
1883   Mat            A;
1884   PetscScalar    *vals,*svals;
1885   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1886   MPI_Status     status;
1887   PetscErrorCode ierr;
1888   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*rowners,maxnz,mm;
1889   PetscInt       i,nz,j,rstart,rend;
1890   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1891   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1892   PetscInt       cend,cstart,n;
1893   int            fd;
1894 
1895   PetscFunctionBegin;
1896   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1897   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1898   if (!rank) {
1899     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1900     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1901     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1902     if (header[3] < 0) {
1903       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1904     }
1905   }
1906 
1907   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1908   M = header[1]; N = header[2];
1909   /* determine ownership of all rows */
1910   m = M/size + ((M % size) > rank);
1911   mm = (PetscMPIInt) m;
1912   ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1913   ierr = MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1914   rowners[0] = 0;
1915   for (i=2; i<=size; i++) {
1916     rowners[i] += rowners[i-1];
1917   }
1918   rstart = rowners[rank];
1919   rend   = rowners[rank+1];
1920 
1921   /* distribute row lengths to all processors */
1922   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1923   offlens = ourlens + (rend-rstart);
1924   if (!rank) {
1925     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1926     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1927     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1928     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1929     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1930     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1931   } else {
1932     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1933   }
1934 
1935   if (!rank) {
1936     /* calculate the number of nonzeros on each processor */
1937     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1938     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1939     for (i=0; i<size; i++) {
1940       for (j=rowners[i]; j< rowners[i+1]; j++) {
1941         procsnz[i] += rowlengths[j];
1942       }
1943     }
1944     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1945 
1946     /* determine max buffer needed and allocate it */
1947     maxnz = 0;
1948     for (i=0; i<size; i++) {
1949       maxnz = PetscMax(maxnz,procsnz[i]);
1950     }
1951     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1952 
1953     /* read in my part of the matrix column indices  */
1954     nz   = procsnz[0];
1955     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1956     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1957 
1958     /* read in every one elses and ship off */
1959     for (i=1; i<size; i++) {
1960       nz   = procsnz[i];
1961       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1962       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1963     }
1964     ierr = PetscFree(cols);CHKERRQ(ierr);
1965   } else {
1966     /* determine buffer space needed for message */
1967     nz = 0;
1968     for (i=0; i<m; i++) {
1969       nz += ourlens[i];
1970     }
1971     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1972 
1973     /* receive message of column indices*/
1974     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1975     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1976     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1977   }
1978 
1979   /* determine column ownership if matrix is not square */
1980   if (N != M) {
1981     n      = N/size + ((N % size) > rank);
1982     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1983     cstart = cend - n;
1984   } else {
1985     cstart = rstart;
1986     cend   = rend;
1987     n      = cend - cstart;
1988   }
1989 
1990   /* loop over local rows, determining number of off diagonal entries */
1991   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1992   jj = 0;
1993   for (i=0; i<m; i++) {
1994     for (j=0; j<ourlens[i]; j++) {
1995       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1996       jj++;
1997     }
1998   }
1999 
2000   /* create our matrix */
2001   for (i=0; i<m; i++) {
2002     ourlens[i] -= offlens[i];
2003   }
2004   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2005   ierr = MatSetType(A,type);CHKERRQ(ierr);
2006   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2007 
2008   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2009   for (i=0; i<m; i++) {
2010     ourlens[i] += offlens[i];
2011   }
2012 
2013   if (!rank) {
2014     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2015 
2016     /* read in my part of the matrix numerical values  */
2017     nz   = procsnz[0];
2018     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2019 
2020     /* insert into matrix */
2021     jj      = rstart;
2022     smycols = mycols;
2023     svals   = vals;
2024     for (i=0; i<m; i++) {
2025       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2026       smycols += ourlens[i];
2027       svals   += ourlens[i];
2028       jj++;
2029     }
2030 
2031     /* read in other processors and ship out */
2032     for (i=1; i<size; i++) {
2033       nz   = procsnz[i];
2034       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2035       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2036     }
2037     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2038   } else {
2039     /* receive numeric values */
2040     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2041 
2042     /* receive message of values*/
2043     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2044     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2045     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2046 
2047     /* insert into matrix */
2048     jj      = rstart;
2049     smycols = mycols;
2050     svals   = vals;
2051     for (i=0; i<m; i++) {
2052       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2053       smycols += ourlens[i];
2054       svals   += ourlens[i];
2055       jj++;
2056     }
2057   }
2058   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2059   ierr = PetscFree(vals);CHKERRQ(ierr);
2060   ierr = PetscFree(mycols);CHKERRQ(ierr);
2061   ierr = PetscFree(rowners);CHKERRQ(ierr);
2062 
2063   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2064   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2065   *newmat = A;
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2071 /*
2072     Not great since it makes two copies of the submatrix, first an SeqAIJ
2073   in local and then by concatenating the local matrices the end result.
2074   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2075 */
2076 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2077 {
2078   PetscErrorCode ierr;
2079   PetscMPIInt    rank,size;
2080   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2081   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2082   Mat            *local,M,Mreuse;
2083   PetscScalar    *vwork,*aa;
2084   MPI_Comm       comm = mat->comm;
2085   Mat_SeqAIJ     *aij;
2086 
2087 
2088   PetscFunctionBegin;
2089   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2090   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2091 
2092   if (call ==  MAT_REUSE_MATRIX) {
2093     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2094     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2095     local = &Mreuse;
2096     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2097   } else {
2098     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2099     Mreuse = *local;
2100     ierr   = PetscFree(local);CHKERRQ(ierr);
2101   }
2102 
2103   /*
2104       m - number of local rows
2105       n - number of columns (same on all processors)
2106       rstart - first row in new global matrix generated
2107   */
2108   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2109   if (call == MAT_INITIAL_MATRIX) {
2110     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2111     ii  = aij->i;
2112     jj  = aij->j;
2113 
2114     /*
2115         Determine the number of non-zeros in the diagonal and off-diagonal
2116         portions of the matrix in order to do correct preallocation
2117     */
2118 
2119     /* first get start and end of "diagonal" columns */
2120     if (csize == PETSC_DECIDE) {
2121       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2122       if (mglobal == n) { /* square matrix */
2123 	nlocal = m;
2124       } else {
2125         nlocal = n/size + ((n % size) > rank);
2126       }
2127     } else {
2128       nlocal = csize;
2129     }
2130     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2131     rstart = rend - nlocal;
2132     if (rank == size - 1 && rend != n) {
2133       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2134     }
2135 
2136     /* next, compute all the lengths */
2137     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2138     olens = dlens + m;
2139     for (i=0; i<m; i++) {
2140       jend = ii[i+1] - ii[i];
2141       olen = 0;
2142       dlen = 0;
2143       for (j=0; j<jend; j++) {
2144         if (*jj < rstart || *jj >= rend) olen++;
2145         else dlen++;
2146         jj++;
2147       }
2148       olens[i] = olen;
2149       dlens[i] = dlen;
2150     }
2151     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2152     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2153     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2154     ierr = PetscFree(dlens);CHKERRQ(ierr);
2155   } else {
2156     PetscInt ml,nl;
2157 
2158     M = *newmat;
2159     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2160     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2161     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2162     /*
2163          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2164        rather than the slower MatSetValues().
2165     */
2166     M->was_assembled = PETSC_TRUE;
2167     M->assembled     = PETSC_FALSE;
2168   }
2169   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2170   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2171   ii  = aij->i;
2172   jj  = aij->j;
2173   aa  = aij->a;
2174   for (i=0; i<m; i++) {
2175     row   = rstart + i;
2176     nz    = ii[i+1] - ii[i];
2177     cwork = jj;     jj += nz;
2178     vwork = aa;     aa += nz;
2179     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2180   }
2181 
2182   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2183   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2184   *newmat = M;
2185 
2186   /* save submatrix used in processor for next request */
2187   if (call ==  MAT_INITIAL_MATRIX) {
2188     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2189     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2190   }
2191 
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 EXTERN_C_BEGIN
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2198 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2199 {
2200   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2201   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2202   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2203   const PetscInt *JJ;
2204   PetscScalar    *values;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208 #if defined(PETSC_OPT_g)
2209   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2210 #endif
2211   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2212   o_nnz = d_nnz + m;
2213 
2214   for (i=0; i<m; i++) {
2215     nnz     = I[i+1]- I[i];
2216     JJ      = J + I[i];
2217     nnz_max = PetscMax(nnz_max,nnz);
2218 #if defined(PETSC_OPT_g)
2219     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2220 #endif
2221     for (j=0; j<nnz; j++) {
2222       if (*JJ >= cstart) break;
2223       JJ++;
2224     }
2225     d = 0;
2226     for (; j<nnz; j++) {
2227       if (*JJ++ >= cend) break;
2228       d++;
2229     }
2230     d_nnz[i] = d;
2231     o_nnz[i] = nnz - d;
2232   }
2233   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2234   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2235 
2236   if (v) values = (PetscScalar*)v;
2237   else {
2238     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2239     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2240   }
2241 
2242   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2243   for (i=0; i<m; i++) {
2244     ii   = i + rstart;
2245     nnz  = I[i+1]- I[i];
2246     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2247   }
2248   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2249   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2250   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2251 
2252   if (!v) {
2253     ierr = PetscFree(values);CHKERRQ(ierr);
2254   }
2255   PetscFunctionReturn(0);
2256 }
2257 EXTERN_C_END
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2261 /*@C
2262    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2263    (the default parallel PETSc format).
2264 
2265    Collective on MPI_Comm
2266 
2267    Input Parameters:
2268 +  A - the matrix
2269 .  i - the indices into j for the start of each local row (starts with zero)
2270 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2271 -  v - optional values in the matrix
2272 
2273    Level: developer
2274 
2275 .keywords: matrix, aij, compressed row, sparse, parallel
2276 
2277 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2278 @*/
2279 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2280 {
2281   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2282 
2283   PetscFunctionBegin;
2284   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2285   if (f) {
2286     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2287   }
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2293 /*@C
2294    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2295    (the default parallel PETSc format).  For good matrix assembly performance
2296    the user should preallocate the matrix storage by setting the parameters
2297    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2298    performance can be increased by more than a factor of 50.
2299 
2300    Collective on MPI_Comm
2301 
2302    Input Parameters:
2303 +  A - the matrix
2304 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2305            (same value is used for all local rows)
2306 .  d_nnz - array containing the number of nonzeros in the various rows of the
2307            DIAGONAL portion of the local submatrix (possibly different for each row)
2308            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2309            The size of this array is equal to the number of local rows, i.e 'm'.
2310            You must leave room for the diagonal entry even if it is zero.
2311 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2312            submatrix (same value is used for all local rows).
2313 -  o_nnz - array containing the number of nonzeros in the various rows of the
2314            OFF-DIAGONAL portion of the local submatrix (possibly different for
2315            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2316            structure. The size of this array is equal to the number
2317            of local rows, i.e 'm'.
2318 
2319    The AIJ format (also called the Yale sparse matrix format or
2320    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2321    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2322 
2323    The parallel matrix is partitioned such that the first m0 rows belong to
2324    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2325    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2326 
2327    The DIAGONAL portion of the local submatrix of a processor can be defined
2328    as the submatrix which is obtained by extraction the part corresponding
2329    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2330    first row that belongs to the processor, and r2 is the last row belonging
2331    to the this processor. This is a square mxm matrix. The remaining portion
2332    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2333 
2334    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2335 
2336    Example usage:
2337 
2338    Consider the following 8x8 matrix with 34 non-zero values, that is
2339    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2340    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2341    as follows:
2342 
2343 .vb
2344             1  2  0  |  0  3  0  |  0  4
2345     Proc0   0  5  6  |  7  0  0  |  8  0
2346             9  0 10  | 11  0  0  | 12  0
2347     -------------------------------------
2348            13  0 14  | 15 16 17  |  0  0
2349     Proc1   0 18  0  | 19 20 21  |  0  0
2350             0  0  0  | 22 23  0  | 24  0
2351     -------------------------------------
2352     Proc2  25 26 27  |  0  0 28  | 29  0
2353            30  0  0  | 31 32 33  |  0 34
2354 .ve
2355 
2356    This can be represented as a collection of submatrices as:
2357 
2358 .vb
2359       A B C
2360       D E F
2361       G H I
2362 .ve
2363 
2364    Where the submatrices A,B,C are owned by proc0, D,E,F are
2365    owned by proc1, G,H,I are owned by proc2.
2366 
2367    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2368    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2369    The 'M','N' parameters are 8,8, and have the same values on all procs.
2370 
2371    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2372    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2373    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2374    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2375    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2376    matrix, ans [DF] as another SeqAIJ matrix.
2377 
2378    When d_nz, o_nz parameters are specified, d_nz storage elements are
2379    allocated for every row of the local diagonal submatrix, and o_nz
2380    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2381    One way to choose d_nz and o_nz is to use the max nonzerors per local
2382    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2383    In this case, the values of d_nz,o_nz are:
2384 .vb
2385      proc0 : dnz = 2, o_nz = 2
2386      proc1 : dnz = 3, o_nz = 2
2387      proc2 : dnz = 1, o_nz = 4
2388 .ve
2389    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2390    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2391    for proc3. i.e we are using 12+15+10=37 storage locations to store
2392    34 values.
2393 
2394    When d_nnz, o_nnz parameters are specified, the storage is specified
2395    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2396    In the above case the values for d_nnz,o_nnz are:
2397 .vb
2398      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2399      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2400      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2401 .ve
2402    Here the space allocated is sum of all the above values i.e 34, and
2403    hence pre-allocation is perfect.
2404 
2405    Level: intermediate
2406 
2407 .keywords: matrix, aij, compressed row, sparse, parallel
2408 
2409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2410           MPIAIJ
2411 @*/
2412 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2413 {
2414   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2415 
2416   PetscFunctionBegin;
2417   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2418   if (f) {
2419     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2420   }
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatCreateMPIAIJ"
2426 /*@C
2427    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2428    (the default parallel PETSc format).  For good matrix assembly performance
2429    the user should preallocate the matrix storage by setting the parameters
2430    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2431    performance can be increased by more than a factor of 50.
2432 
2433    Collective on MPI_Comm
2434 
2435    Input Parameters:
2436 +  comm - MPI communicator
2437 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2438            This value should be the same as the local size used in creating the
2439            y vector for the matrix-vector product y = Ax.
2440 .  n - This value should be the same as the local size used in creating the
2441        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2442        calculated if N is given) For square matrices n is almost always m.
2443 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2444 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2445 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2446            (same value is used for all local rows)
2447 .  d_nnz - array containing the number of nonzeros in the various rows of the
2448            DIAGONAL portion of the local submatrix (possibly different for each row)
2449            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2450            The size of this array is equal to the number of local rows, i.e 'm'.
2451            You must leave room for the diagonal entry even if it is zero.
2452 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2453            submatrix (same value is used for all local rows).
2454 -  o_nnz - array containing the number of nonzeros in the various rows of the
2455            OFF-DIAGONAL portion of the local submatrix (possibly different for
2456            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2457            structure. The size of this array is equal to the number
2458            of local rows, i.e 'm'.
2459 
2460    Output Parameter:
2461 .  A - the matrix
2462 
2463    Notes:
2464    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2465    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2466    storage requirements for this matrix.
2467 
2468    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2469    processor than it must be used on all processors that share the object for
2470    that argument.
2471 
2472    The user MUST specify either the local or global matrix dimensions
2473    (possibly both).
2474 
2475    The parallel matrix is partitioned such that the first m0 rows belong to
2476    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2477    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2478 
2479    The DIAGONAL portion of the local submatrix of a processor can be defined
2480    as the submatrix which is obtained by extraction the part corresponding
2481    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2482    first row that belongs to the processor, and r2 is the last row belonging
2483    to the this processor. This is a square mxm matrix. The remaining portion
2484    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2485 
2486    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2487 
2488    When calling this routine with a single process communicator, a matrix of
2489    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2490    type of communicator, use the construction mechanism:
2491      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2492 
2493    By default, this format uses inodes (identical nodes) when possible.
2494    We search for consecutive rows with the same nonzero structure, thereby
2495    reusing matrix information to achieve increased efficiency.
2496 
2497    Options Database Keys:
2498 +  -mat_aij_no_inode  - Do not use inodes
2499 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2500 -  -mat_aij_oneindex - Internally use indexing starting at 1
2501         rather than 0.  Note that when calling MatSetValues(),
2502         the user still MUST index entries starting at 0!
2503 
2504 
2505    Example usage:
2506 
2507    Consider the following 8x8 matrix with 34 non-zero values, that is
2508    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2509    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2510    as follows:
2511 
2512 .vb
2513             1  2  0  |  0  3  0  |  0  4
2514     Proc0   0  5  6  |  7  0  0  |  8  0
2515             9  0 10  | 11  0  0  | 12  0
2516     -------------------------------------
2517            13  0 14  | 15 16 17  |  0  0
2518     Proc1   0 18  0  | 19 20 21  |  0  0
2519             0  0  0  | 22 23  0  | 24  0
2520     -------------------------------------
2521     Proc2  25 26 27  |  0  0 28  | 29  0
2522            30  0  0  | 31 32 33  |  0 34
2523 .ve
2524 
2525    This can be represented as a collection of submatrices as:
2526 
2527 .vb
2528       A B C
2529       D E F
2530       G H I
2531 .ve
2532 
2533    Where the submatrices A,B,C are owned by proc0, D,E,F are
2534    owned by proc1, G,H,I are owned by proc2.
2535 
2536    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2537    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2538    The 'M','N' parameters are 8,8, and have the same values on all procs.
2539 
2540    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2541    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2542    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2543    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2544    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2545    matrix, ans [DF] as another SeqAIJ matrix.
2546 
2547    When d_nz, o_nz parameters are specified, d_nz storage elements are
2548    allocated for every row of the local diagonal submatrix, and o_nz
2549    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2550    One way to choose d_nz and o_nz is to use the max nonzerors per local
2551    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2552    In this case, the values of d_nz,o_nz are:
2553 .vb
2554      proc0 : dnz = 2, o_nz = 2
2555      proc1 : dnz = 3, o_nz = 2
2556      proc2 : dnz = 1, o_nz = 4
2557 .ve
2558    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2559    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2560    for proc3. i.e we are using 12+15+10=37 storage locations to store
2561    34 values.
2562 
2563    When d_nnz, o_nnz parameters are specified, the storage is specified
2564    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2565    In the above case the values for d_nnz,o_nnz are:
2566 .vb
2567      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2568      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2569      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2570 .ve
2571    Here the space allocated is sum of all the above values i.e 34, and
2572    hence pre-allocation is perfect.
2573 
2574    Level: intermediate
2575 
2576 .keywords: matrix, aij, compressed row, sparse, parallel
2577 
2578 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2579           MPIAIJ
2580 @*/
2581 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2582 {
2583   PetscErrorCode ierr;
2584   PetscMPIInt    size;
2585 
2586   PetscFunctionBegin;
2587   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2588   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2589   if (size > 1) {
2590     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2591     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2592   } else {
2593     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2594     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2595   }
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 #undef __FUNCT__
2600 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2601 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2602 {
2603   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2604 
2605   PetscFunctionBegin;
2606   *Ad     = a->A;
2607   *Ao     = a->B;
2608   *colmap = a->garray;
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 #undef __FUNCT__
2613 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2614 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2615 {
2616   PetscErrorCode ierr;
2617   PetscInt       i;
2618   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2619 
2620   PetscFunctionBegin;
2621   if (coloring->ctype == IS_COLORING_LOCAL) {
2622     ISColoringValue *allcolors,*colors;
2623     ISColoring      ocoloring;
2624 
2625     /* set coloring for diagonal portion */
2626     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2627 
2628     /* set coloring for off-diagonal portion */
2629     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2630     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2631     for (i=0; i<a->B->n; i++) {
2632       colors[i] = allcolors[a->garray[i]];
2633     }
2634     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2635     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2636     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2637     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2638   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2639     ISColoringValue *colors;
2640     PetscInt             *larray;
2641     ISColoring      ocoloring;
2642 
2643     /* set coloring for diagonal portion */
2644     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2645     for (i=0; i<a->A->n; i++) {
2646       larray[i] = i + a->cstart;
2647     }
2648     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2649     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2650     for (i=0; i<a->A->n; i++) {
2651       colors[i] = coloring->colors[larray[i]];
2652     }
2653     ierr = PetscFree(larray);CHKERRQ(ierr);
2654     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2655     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2656     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2657 
2658     /* set coloring for off-diagonal portion */
2659     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2660     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2661     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2662     for (i=0; i<a->B->n; i++) {
2663       colors[i] = coloring->colors[larray[i]];
2664     }
2665     ierr = PetscFree(larray);CHKERRQ(ierr);
2666     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2667     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2668     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2669   } else {
2670     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2671   }
2672 
2673   PetscFunctionReturn(0);
2674 }
2675 
2676 #undef __FUNCT__
2677 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2678 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2679 {
2680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2681   PetscErrorCode ierr;
2682 
2683   PetscFunctionBegin;
2684   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2685   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 #undef __FUNCT__
2690 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2691 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2692 {
2693   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2694   PetscErrorCode ierr;
2695 
2696   PetscFunctionBegin;
2697   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2698   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2699   PetscFunctionReturn(0);
2700 }
2701 
2702 #undef __FUNCT__
2703 #define __FUNCT__ "MatMerge"
2704 /*@C
2705       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2706                  matrices from each processor
2707 
2708     Collective on MPI_Comm
2709 
2710    Input Parameters:
2711 +    comm - the communicators the parallel matrix will live on
2712 .    inmat - the input sequential matrices
2713 .    n - number of local columns (or PETSC_DECIDE)
2714 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2715 
2716    Output Parameter:
2717 .    outmat - the parallel matrix generated
2718 
2719     Level: advanced
2720 
2721    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2722 
2723 @*/
2724 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2725 {
2726   PetscErrorCode    ierr;
2727   PetscInt          m,N,i,rstart,nnz,I,*dnz,*onz;
2728   const PetscInt    *indx;
2729   const PetscScalar *values;
2730   PetscMap          columnmap,rowmap;
2731 
2732   PetscFunctionBegin;
2733     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2734   /*
2735   PetscMPIInt       rank;
2736   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2737   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2738   */
2739   if (scall == MAT_INITIAL_MATRIX){
2740     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2741     if (n == PETSC_DECIDE){
2742       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2743       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2744       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2745       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2746       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2747     }
2748 
2749     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2750     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2751     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2752     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2753     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2754 
2755     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2756     for (i=0;i<m;i++) {
2757       ierr = MatGetRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2758       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2759       ierr = MatRestoreRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2760     }
2761     /* This routine will ONLY return MPIAIJ type matrix */
2762     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2763     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2764     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2765     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2766 
2767   } else if (scall == MAT_REUSE_MATRIX){
2768     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2769   } else {
2770     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2771   }
2772 
2773   for (i=0;i<m;i++) {
2774     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2775     I    = i + rstart;
2776     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2777     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2778   }
2779   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2780   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2781   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2782 
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 #undef __FUNCT__
2787 #define __FUNCT__ "MatFileSplit"
2788 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2789 {
2790   PetscErrorCode    ierr;
2791   PetscMPIInt       rank;
2792   PetscInt          m,N,i,rstart,nnz;
2793   size_t            len;
2794   const PetscInt    *indx;
2795   PetscViewer       out;
2796   char              *name;
2797   Mat               B;
2798   const PetscScalar *values;
2799 
2800   PetscFunctionBegin;
2801   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2802   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2803   /* Should this be the type of the diagonal block of A? */
2804   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2805   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2806   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2807   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2808   for (i=0;i<m;i++) {
2809     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2810     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2811     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2812   }
2813   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2814   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2815 
2816   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2817   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2818   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2819   sprintf(name,"%s.%d",outfile,rank);
2820   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2821   ierr = PetscFree(name);
2822   ierr = MatView(B,out);CHKERRQ(ierr);
2823   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2824   ierr = MatDestroy(B);CHKERRQ(ierr);
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2829 #undef __FUNCT__
2830 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2831 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2832 {
2833   PetscErrorCode       ierr;
2834   Mat_Merge_SeqsToMPI  *merge;
2835   PetscObjectContainer container;
2836 
2837   PetscFunctionBegin;
2838   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2839   if (container) {
2840     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2841     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2842     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2843     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2844     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2845     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2846     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2847     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2848     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2849     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2850     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2851     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2852 
2853     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2854     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2855   }
2856   ierr = PetscFree(merge);CHKERRQ(ierr);
2857 
2858   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 #include "src/mat/utils/freespace.h"
2863 #include "petscbt.h"
2864 #undef __FUNCT__
2865 #define __FUNCT__ "MatMerge_SeqsToMPI"
2866 /*@C
2867       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2868                  matrices from each processor
2869 
2870     Collective on MPI_Comm
2871 
2872    Input Parameters:
2873 +    comm - the communicators the parallel matrix will live on
2874 .    seqmat - the input sequential matrices
2875 .    m - number of local rows (or PETSC_DECIDE)
2876 .    n - number of local columns (or PETSC_DECIDE)
2877 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2878 
2879    Output Parameter:
2880 .    mpimat - the parallel matrix generated
2881 
2882     Level: advanced
2883 
2884    Notes:
2885      The dimensions of the sequential matrix in each processor MUST be the same.
2886      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2887      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2888 @*/
2889 static PetscEvent logkey_seqstompinum = 0;
2890 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2891 {
2892   PetscErrorCode       ierr;
2893   MPI_Comm             comm=mpimat->comm;
2894   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2895   PetscMPIInt          size,rank,taga,*len_s;
2896   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2897   PetscInt             proc,m;
2898   PetscInt             **buf_ri,**buf_rj;
2899   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2900   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2901   MPI_Request          *s_waits,*r_waits;
2902   MPI_Status           *status;
2903   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2904   Mat_Merge_SeqsToMPI  *merge;
2905   PetscObjectContainer container;
2906 
2907   PetscFunctionBegin;
2908   if (!logkey_seqstompinum) {
2909     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2910   }
2911   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2912 
2913   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2914   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2915 
2916   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2917   if (container) {
2918     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2919   }
2920   bi     = merge->bi;
2921   bj     = merge->bj;
2922   buf_ri = merge->buf_ri;
2923   buf_rj = merge->buf_rj;
2924 
2925   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2926   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2927   len_s  = merge->len_s;
2928 
2929   /* send and recv matrix values */
2930   /*-----------------------------*/
2931   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2932   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2933 
2934   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2935   for (proc=0,k=0; proc<size; proc++){
2936     if (!len_s[proc]) continue;
2937     i = owners[proc];
2938     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2939     k++;
2940   }
2941 
2942   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2943   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2944   ierr = PetscFree(status);CHKERRQ(ierr);
2945 
2946   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2947   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2948 
2949   /* insert mat values of mpimat */
2950   /*----------------------------*/
2951   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2952   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2953   nextrow = buf_ri_k + merge->nrecv;
2954   nextai  = nextrow + merge->nrecv;
2955 
2956   for (k=0; k<merge->nrecv; k++){
2957     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2958     nrows = *(buf_ri_k[k]);
2959     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2960     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2961   }
2962 
2963   /* set values of ba */
2964   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2965   for (i=0; i<m; i++) {
2966     arow = owners[rank] + i;
2967     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2968     bnzi = bi[i+1] - bi[i];
2969     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2970 
2971     /* add local non-zero vals of this proc's seqmat into ba */
2972     anzi = ai[arow+1] - ai[arow];
2973     aj   = a->j + ai[arow];
2974     aa   = a->a + ai[arow];
2975     nextaj = 0;
2976     for (j=0; nextaj<anzi; j++){
2977       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2978         ba_i[j] += aa[nextaj++];
2979       }
2980     }
2981 
2982     /* add received vals into ba */
2983     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2984       /* i-th row */
2985       if (i == *nextrow[k]) {
2986         anzi = *(nextai[k]+1) - *nextai[k];
2987         aj   = buf_rj[k] + *(nextai[k]);
2988         aa   = abuf_r[k] + *(nextai[k]);
2989         nextaj = 0;
2990         for (j=0; nextaj<anzi; j++){
2991           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2992             ba_i[j] += aa[nextaj++];
2993           }
2994         }
2995         nextrow[k]++; nextai[k]++;
2996       }
2997     }
2998     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2999   }
3000   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3001   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3002 
3003   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3004   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3005   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3006   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3007   PetscFunctionReturn(0);
3008 }
3009 static PetscEvent logkey_seqstompisym = 0;
3010 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3011 {
3012   PetscErrorCode       ierr;
3013   Mat                  B_mpi;
3014   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3015   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3016   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3017   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3018   PetscInt             len,proc,*dnz,*onz;
3019   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3020   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3021   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3022   MPI_Status           *status;
3023   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3024   PetscBT              lnkbt;
3025   Mat_Merge_SeqsToMPI  *merge;
3026   PetscObjectContainer container;
3027 
3028   PetscFunctionBegin;
3029   if (!logkey_seqstompisym) {
3030     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3031   }
3032   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3033 
3034   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3035   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3036 
3037   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3038   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3039 
3040   /* determine row ownership */
3041   /*---------------------------------------------------------*/
3042   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3043   if (m == PETSC_DECIDE) {
3044     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3045   } else {
3046     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3047   }
3048   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3049   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3050   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3051 
3052   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3053   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3054 
3055   /* determine the number of messages to send, their lengths */
3056   /*---------------------------------------------------------*/
3057   len_s  = merge->len_s;
3058 
3059   len = 0;  /* length of buf_si[] */
3060   merge->nsend = 0;
3061   for (proc=0; proc<size; proc++){
3062     len_si[proc] = 0;
3063     if (proc == rank){
3064       len_s[proc] = 0;
3065     } else {
3066       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3067       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3068     }
3069     if (len_s[proc]) {
3070       merge->nsend++;
3071       nrows = 0;
3072       for (i=owners[proc]; i<owners[proc+1]; i++){
3073         if (ai[i+1] > ai[i]) nrows++;
3074       }
3075       len_si[proc] = 2*(nrows+1);
3076       len += len_si[proc];
3077     }
3078   }
3079 
3080   /* determine the number and length of messages to receive for ij-structure */
3081   /*-------------------------------------------------------------------------*/
3082   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3083   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3084 
3085   /* post the Irecv of j-structure */
3086   /*-------------------------------*/
3087   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3088   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3089 
3090   /* post the Isend of j-structure */
3091   /*--------------------------------*/
3092   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3093   sj_waits = si_waits + merge->nsend;
3094 
3095   for (proc=0, k=0; proc<size; proc++){
3096     if (!len_s[proc]) continue;
3097     i = owners[proc];
3098     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3099     k++;
3100   }
3101 
3102   /* receives and sends of j-structure are complete */
3103   /*------------------------------------------------*/
3104   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3105   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3106 
3107   /* send and recv i-structure */
3108   /*---------------------------*/
3109   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3110   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3111 
3112   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3113   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3114   for (proc=0,k=0; proc<size; proc++){
3115     if (!len_s[proc]) continue;
3116     /* form outgoing message for i-structure:
3117          buf_si[0]:                 nrows to be sent
3118                [1:nrows]:           row index (global)
3119                [nrows+1:2*nrows+1]: i-structure index
3120     */
3121     /*-------------------------------------------*/
3122     nrows = len_si[proc]/2 - 1;
3123     buf_si_i    = buf_si + nrows+1;
3124     buf_si[0]   = nrows;
3125     buf_si_i[0] = 0;
3126     nrows = 0;
3127     for (i=owners[proc]; i<owners[proc+1]; i++){
3128       anzi = ai[i+1] - ai[i];
3129       if (anzi) {
3130         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3131         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3132         nrows++;
3133       }
3134     }
3135     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3136     k++;
3137     buf_si += len_si[proc];
3138   }
3139 
3140   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3141   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3142 
3143   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3144   for (i=0; i<merge->nrecv; i++){
3145     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3146   }
3147 
3148   ierr = PetscFree(len_si);CHKERRQ(ierr);
3149   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3150   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3151   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3152   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3153   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3154   ierr = PetscFree(status);CHKERRQ(ierr);
3155 
3156   /* compute a local seq matrix in each processor */
3157   /*----------------------------------------------*/
3158   /* allocate bi array and free space for accumulating nonzero column info */
3159   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3160   bi[0] = 0;
3161 
3162   /* create and initialize a linked list */
3163   nlnk = N+1;
3164   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3165 
3166   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3167   len = 0;
3168   len  = ai[owners[rank+1]] - ai[owners[rank]];
3169   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3170   current_space = free_space;
3171 
3172   /* determine symbolic info for each local row */
3173   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3174   nextrow = buf_ri_k + merge->nrecv;
3175   nextai  = nextrow + merge->nrecv;
3176   for (k=0; k<merge->nrecv; k++){
3177     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3178     nrows = *buf_ri_k[k];
3179     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3180     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3181   }
3182 
3183   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3184   len = 0;
3185   for (i=0;i<m;i++) {
3186     bnzi   = 0;
3187     /* add local non-zero cols of this proc's seqmat into lnk */
3188     arow   = owners[rank] + i;
3189     anzi   = ai[arow+1] - ai[arow];
3190     aj     = a->j + ai[arow];
3191     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3192     bnzi += nlnk;
3193     /* add received col data into lnk */
3194     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3195       if (i == *nextrow[k]) { /* i-th row */
3196         anzi = *(nextai[k]+1) - *nextai[k];
3197         aj   = buf_rj[k] + *nextai[k];
3198         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3199         bnzi += nlnk;
3200         nextrow[k]++; nextai[k]++;
3201       }
3202     }
3203     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3204 
3205     /* if free space is not available, make more free space */
3206     if (current_space->local_remaining<bnzi) {
3207       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3208       nspacedouble++;
3209     }
3210     /* copy data into free space, then initialize lnk */
3211     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3212     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3213 
3214     current_space->array           += bnzi;
3215     current_space->local_used      += bnzi;
3216     current_space->local_remaining -= bnzi;
3217 
3218     bi[i+1] = bi[i] + bnzi;
3219   }
3220 
3221   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3222 
3223   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3224   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3225   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3226 
3227   /* create symbolic parallel matrix B_mpi */
3228   /*---------------------------------------*/
3229   if (n==PETSC_DECIDE) {
3230     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3231   } else {
3232     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3233   }
3234   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3235   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3236   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3237 
3238   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3239   B_mpi->assembled     = PETSC_FALSE;
3240   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3241   merge->bi            = bi;
3242   merge->bj            = bj;
3243   merge->buf_ri        = buf_ri;
3244   merge->buf_rj        = buf_rj;
3245   merge->coi           = PETSC_NULL;
3246   merge->coj           = PETSC_NULL;
3247    merge->owners_co    = PETSC_NULL;
3248 
3249   /* attach the supporting struct to B_mpi for reuse */
3250   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3251   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3252   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3253   *mpimat = B_mpi;
3254   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3255   PetscFunctionReturn(0);
3256 }
3257 
3258 static PetscEvent logkey_seqstompi = 0;
3259 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3260 {
3261   PetscErrorCode   ierr;
3262 
3263   PetscFunctionBegin;
3264   if (!logkey_seqstompi) {
3265     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3266   }
3267   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3268   if (scall == MAT_INITIAL_MATRIX){
3269     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3270   }
3271   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3272   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3273   PetscFunctionReturn(0);
3274 }
3275 static PetscEvent logkey_getlocalmat = 0;
3276 #undef __FUNCT__
3277 #define __FUNCT__ "MatGetLocalMat"
3278 /*@C
3279      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3280 
3281     Not Collective
3282 
3283    Input Parameters:
3284 +    A - the matrix
3285 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3286 
3287    Output Parameter:
3288 .    A_loc - the local sequential matrix generated
3289 
3290     Level: developer
3291 
3292 @*/
3293 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3294 {
3295   PetscErrorCode  ierr;
3296   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3297   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3298   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3299   MatScalar       *aa=a->a,*ba=b->a,*ca;
3300   PetscInt        am=A->m,i,j,k,*nnz,nnz_max,ncols,cstart=mpimat->cstart;
3301   Mat             Aw;
3302   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3303   PetscInt        stages[2];
3304 
3305   PetscFunctionBegin;
3306   ierr = PetscLogStageRegister(&stages[0],"LocalMat_init");CHKERRQ(ierr);
3307   ierr = PetscLogStageRegister(&stages[1],"LocalMat_ruse");CHKERRQ(ierr);
3308 
3309   if (!logkey_getlocalmat) {
3310     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3311   }
3312   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3313   if (scall == MAT_INITIAL_MATRIX){
3314     ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr);
3315     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
3316     nnz_max = 0;
3317     for (i=0; i<am; i++){
3318       nnz[i] = (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3319       if (nnz[i] > nnz_max) nnz_max = nnz[i];
3320     }
3321     ierr = MatCreate(PETSC_COMM_SELF,am,A->N,am,A->N,&Aw);CHKERRQ(ierr);
3322     ierr = MatSetType(Aw,MATSEQAIJ);CHKERRQ(ierr);
3323     ierr = MatSeqAIJSetPreallocation(Aw,0,nnz);CHKERRQ(ierr);
3324     ierr = MatSetOption(Aw,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
3325 
3326     ierr = PetscMalloc((1+nnz_max)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3327     ierr = PetscMalloc((1+nnz_max)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
3328     for (i=0; i<am; i++) {
3329       ncols_o = bi[i+1] - bi[i];
3330       ncols_d = ai[i+1] - ai[i];
3331       ncols   = ncols_d + ncols_o;
3332       k = 0;
3333       /* off-diagonal portion of A */
3334       for (jo=0; jo<ncols_o; jo++) {
3335         col = cmap[*bj];
3336         if (col >= cstart) break;
3337         cj[k]   = col; bj++;
3338         ca[k++] = *ba++;
3339       }
3340       /* diagonal portion of A */
3341       for (j=0; j<ncols_d; j++) {
3342         cj[k]   = cstart + *aj++;
3343         ca[k++] = *aa++;
3344       }
3345       /* off-diagonal portion of A */
3346       for (j=jo;j<ncols_o; j++) {
3347         cj[k]   = cmap[*bj++];
3348         ca[k++] = *ba++;
3349       }
3350       ierr = MatSetValues_SeqAIJ(Aw,1,&i,ncols,cj,ca,INSERT_VALUES);CHKERRQ(ierr);
3351     }
3352     ierr = MatAssemblyBegin(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3353     ierr = MatAssemblyEnd(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3354     ierr = PetscFree(cj);CHKERRQ(ierr);
3355     ierr = PetscFree(ca);CHKERRQ(ierr);
3356     ierr = PetscFree(nnz);CHKERRQ(ierr);
3357     *A_loc = Aw;
3358     ierr = PetscLogStagePop();
3359   } else if (scall == MAT_REUSE_MATRIX){
3360     ierr = PetscLogStagePush(stages[1]);
3361     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3362     ci = mat->i; cj = mat->j; ca = mat->a;
3363     for (i=0; i<am; i++) {
3364       /* off-diagonal portion of A */
3365       ncols_o = bi[i+1] - bi[i];
3366       for (jo=0; jo<ncols_o; jo++) {
3367         col = cmap[*bj];
3368         if (col >= cstart) break;
3369         *ca++ = *ba++;
3370       }
3371       /* diagonal portion of A */
3372       ncols = ai[i+1] - ai[i];
3373       for (j=0; j<ncols; j++) *ca++ = *aa++;
3374       /* off-diagonal portion of A */
3375       for (j=jo;j<ncols_o; j++) *ca++ = *ba++;
3376     }
3377     ierr = PetscLogStagePop();
3378   } else {
3379     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3380   }
3381 
3382   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3383   PetscFunctionReturn(0);
3384 }
3385 
3386 static PetscEvent logkey_getlocalmatcondensed = 0;
3387 #undef __FUNCT__
3388 #define __FUNCT__ "MatGetLocalMatCondensed"
3389 /*@C
3390      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3391 
3392     Not Collective
3393 
3394    Input Parameters:
3395 +    A - the matrix
3396 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3397 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3398 
3399    Output Parameter:
3400 .    A_loc - the local sequential matrix generated
3401 
3402     Level: developer
3403 
3404 @*/
3405 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3406 {
3407   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3408   PetscErrorCode    ierr;
3409   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3410   IS                isrowa,iscola;
3411   Mat               *aloc;
3412 
3413   PetscFunctionBegin;
3414   if (!logkey_getlocalmatcondensed) {
3415     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3416   }
3417   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3418   if (!row){
3419     start = a->rstart; end = a->rend;
3420     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3421   } else {
3422     isrowa = *row;
3423   }
3424   if (!col){
3425     start = a->cstart;
3426     cmap  = a->garray;
3427     nzA   = a->A->n;
3428     nzB   = a->B->n;
3429     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3430     ncols = 0;
3431     for (i=0; i<nzB; i++) {
3432       if (cmap[i] < start) idx[ncols++] = cmap[i];
3433       else break;
3434     }
3435     imark = i;
3436     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3437     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3438     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3439     ierr = PetscFree(idx);CHKERRQ(ierr);
3440   } else {
3441     iscola = *col;
3442   }
3443   if (scall != MAT_INITIAL_MATRIX){
3444     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3445     aloc[0] = *A_loc;
3446   }
3447   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3448   *A_loc = aloc[0];
3449   ierr = PetscFree(aloc);CHKERRQ(ierr);
3450   if (!row){
3451     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3452   }
3453   if (!col){
3454     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3455   }
3456   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3457   PetscFunctionReturn(0);
3458 }
3459 
3460 static PetscEvent logkey_GetBrowsOfAcols = 0;
3461 #undef __FUNCT__
3462 #define __FUNCT__ "MatGetBrowsOfAcols"
3463 /*@C
3464     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3465 
3466     Collective on Mat
3467 
3468    Input Parameters:
3469 +    A,B - the matrices in mpiaij format
3470 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3471 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3472 
3473    Output Parameter:
3474 +    rowb, colb - index sets of rows and columns of B to extract
3475 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3476 -    B_seq - the sequential matrix generated
3477 
3478     Level: developer
3479 
3480 @*/
3481 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3482 {
3483   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3484   PetscErrorCode    ierr;
3485   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3486   IS                isrowb,iscolb;
3487   Mat               *bseq;
3488 
3489   PetscFunctionBegin;
3490   if (a->cstart != b->rstart || a->cend != b->rend){
3491     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3492   }
3493   if (!logkey_GetBrowsOfAcols) {
3494     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3495   }
3496   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3497 
3498   if (scall == MAT_INITIAL_MATRIX){
3499     start = a->cstart;
3500     cmap  = a->garray;
3501     nzA   = a->A->n;
3502     nzB   = a->B->n;
3503     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3504     ncols = 0;
3505     for (i=0; i<nzB; i++) {  /* row < local row index */
3506       if (cmap[i] < start) idx[ncols++] = cmap[i];
3507       else break;
3508     }
3509     imark = i;
3510     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3511     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3512     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3513     ierr = PetscFree(idx);CHKERRQ(ierr);
3514     *brstart = imark;
3515     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3516   } else {
3517     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3518     isrowb = *rowb; iscolb = *colb;
3519     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3520     bseq[0] = *B_seq;
3521   }
3522   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3523   *B_seq = bseq[0];
3524   ierr = PetscFree(bseq);CHKERRQ(ierr);
3525   if (!rowb){
3526     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3527   } else {
3528     *rowb = isrowb;
3529   }
3530   if (!colb){
3531     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3532   } else {
3533     *colb = iscolb;
3534   }
3535   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3536   PetscFunctionReturn(0);
3537 }
3538 
3539 static PetscEvent logkey_GetBrowsOfAocols = 0;
3540 #undef __FUNCT__
3541 #define __FUNCT__ "MatGetBrowsOfAoCols"
3542 /*@C
3543     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3544     of the OFF-DIAGONAL portion of local A
3545 
3546     Collective on Mat
3547 
3548    Input Parameters:
3549 +    A,B - the matrices in mpiaij format
3550 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3551 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3552 
3553    Output Parameter:
3554 +    rowb, colb - index sets of rows and columns of B to extract
3555 .    brstart - row index of B_seq from which the remaining rows correspond to A's columns that are greater than cend.
3556 -    B_seq - the sequential matrix generated
3557 
3558     Level: developer
3559 
3560 @*/
3561 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3562 {
3563   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3564   PetscErrorCode    ierr;
3565   PetscInt          *idx,i,start,ncols,nzB,*cmap;
3566   IS                isrowb,iscolb;
3567   Mat               *bseq;
3568 
3569   PetscFunctionBegin;
3570   if (a->cstart != b->rstart || a->cend != b->rend){
3571     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3572   }
3573   if (!logkey_GetBrowsOfAocols) {
3574     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3575   }
3576   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3577 
3578   if (scall == MAT_INITIAL_MATRIX){
3579     start = a->cstart;
3580     cmap  = a->garray;
3581     nzB   = a->B->n;
3582     if (!nzB){ /* output a null matrix */
3583       B_seq = PETSC_NULL;
3584       ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3585       PetscFunctionReturn(0);
3586     }
3587     ierr  = PetscMalloc(nzB*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3588     ncols = 0;
3589     *brstart = nzB;
3590     for (i=0; i<nzB; i++) {  /* row < local row index */
3591       if (cmap[i] < start) idx[ncols++] = cmap[i];
3592       else break;
3593     }
3594     *brstart = i;
3595     for (i=*brstart; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3596     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3597     ierr = PetscFree(idx);CHKERRQ(ierr);
3598 
3599     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3600   } else {
3601     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3602     isrowb = *rowb; iscolb = *colb;
3603     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3604     bseq[0] = *B_seq;
3605   }
3606   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3607   *B_seq = bseq[0];
3608   ierr = PetscFree(bseq);CHKERRQ(ierr);
3609   if (!rowb){
3610     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3611   } else {
3612     *rowb = isrowb;
3613   }
3614   if (!colb){
3615     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3616   } else {
3617     *colb = iscolb;
3618   }
3619   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3620   PetscFunctionReturn(0);
3621 }
3622 
3623 /*MC
3624    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3625 
3626    Options Database Keys:
3627 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3628 
3629   Level: beginner
3630 
3631 .seealso: MatCreateMPIAIJ
3632 M*/
3633 
3634 EXTERN_C_BEGIN
3635 #undef __FUNCT__
3636 #define __FUNCT__ "MatCreate_MPIAIJ"
3637 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3638 {
3639   Mat_MPIAIJ     *b;
3640   PetscErrorCode ierr;
3641   PetscInt       i;
3642   PetscMPIInt    size;
3643 
3644   PetscFunctionBegin;
3645   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3646 
3647   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3648   B->data         = (void*)b;
3649   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3650   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3651   B->factor       = 0;
3652   B->assembled    = PETSC_FALSE;
3653   B->mapping      = 0;
3654 
3655   B->insertmode      = NOT_SET_VALUES;
3656   b->size            = size;
3657   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3658 
3659   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3660   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3661 
3662   /* the information in the maps duplicates the information computed below, eventually
3663      we should remove the duplicate information that is not contained in the maps */
3664   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3665   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3666 
3667   /* build local table of row and column ownerships */
3668   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3669   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3670   b->cowners = b->rowners + b->size + 2;
3671   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3672   b->rowners[0] = 0;
3673   for (i=2; i<=b->size; i++) {
3674     b->rowners[i] += b->rowners[i-1];
3675   }
3676   b->rstart = b->rowners[b->rank];
3677   b->rend   = b->rowners[b->rank+1];
3678   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3679   b->cowners[0] = 0;
3680   for (i=2; i<=b->size; i++) {
3681     b->cowners[i] += b->cowners[i-1];
3682   }
3683   b->cstart = b->cowners[b->rank];
3684   b->cend   = b->cowners[b->rank+1];
3685 
3686   /* build cache for off array entries formed */
3687   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3688   b->donotstash  = PETSC_FALSE;
3689   b->colmap      = 0;
3690   b->garray      = 0;
3691   b->roworiented = PETSC_TRUE;
3692 
3693   /* stuff used for matrix vector multiply */
3694   b->lvec      = PETSC_NULL;
3695   b->Mvctx     = PETSC_NULL;
3696 
3697   /* stuff for MatGetRow() */
3698   b->rowindices   = 0;
3699   b->rowvalues    = 0;
3700   b->getrowactive = PETSC_FALSE;
3701 
3702   /* Explicitly create 2 MATSEQAIJ matrices. */
3703   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3704   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3705   PetscLogObjectParent(B,b->A);
3706   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3707   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3708   PetscLogObjectParent(B,b->B);
3709 
3710   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3711                                      "MatStoreValues_MPIAIJ",
3712                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3713   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3714                                      "MatRetrieveValues_MPIAIJ",
3715                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3716   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3717 				     "MatGetDiagonalBlock_MPIAIJ",
3718                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3719   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3720 				     "MatIsTranspose_MPIAIJ",
3721 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3722   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3723 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3724 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3725   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3726 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3727 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3728   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3729 				     "MatDiagonalScaleLocal_MPIAIJ",
3730 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3731   PetscFunctionReturn(0);
3732 }
3733 EXTERN_C_END
3734