xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ee45ca4afdde1af4d1deda7cd4dc1a4a63a3ea97)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                    ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (PetscInt*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                    ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
133       if (nonew == 1) goto b_noinsert; \
134       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       if (nrow >= rmax) { \
136         /* there is no extra room in row, therefore enlarge */ \
137         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
138         PetscScalar *new_a; \
139  \
140         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
141  \
142         /* malloc new storage space */ \
143         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
144         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
145         new_j   = (PetscInt*)(new_a + new_nz); \
146         new_i   = new_j + new_nz; \
147  \
148         /* copy over old data into new slots */ \
149         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
150         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
151         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
152         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
153         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
154                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
156         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
157                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
158         /* free up old matrix storage */ \
159  \
160         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
161         if (!b->singlemalloc) { \
162           ierr = PetscFree(b->i);CHKERRQ(ierr); \
163           ierr = PetscFree(b->j);CHKERRQ(ierr); \
164         } \
165         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
166         b->singlemalloc = PETSC_TRUE; \
167  \
168         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
169         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
170         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
171         b->maxnz += CHUNKSIZE; \
172         b->reallocs++; \
173       } \
174       N = nrow++ - 1; b->nz++; \
175       /* shift up all the later entries in this row */ \
176       for (ii=N; ii>=_i; ii--) { \
177         rp[ii+1] = rp[ii]; \
178         ap[ii+1] = ap[ii]; \
179       } \
180       rp[_i] = col1;  \
181       ap[_i] = value;  \
182       b_noinsert: ; \
183       bilen[row] = nrow; \
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIAIJ"
188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
191   PetscScalar    value;
192   PetscErrorCode ierr;
193   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
194   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
195   PetscTruth     roworiented = aij->roworiented;
196 
197   /* Some Variables required in the macro */
198   Mat            A = aij->A;
199   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
200   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
201   PetscScalar    *aa = a->a;
202   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
203   Mat            B = aij->B;
204   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
205   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
206   PetscScalar    *ba = b->a;
207 
208   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
209   PetscInt       nonew = a->nonew,shift=0;
210   PetscScalar    *ap;
211 
212   PetscFunctionBegin;
213   for (i=0; i<m; i++) {
214     if (im[i] < 0) continue;
215 #if defined(PETSC_USE_DEBUG)
216     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
217 #endif
218     if (im[i] >= rstart && im[i] < rend) {
219       row = im[i] - rstart;
220       for (j=0; j<n; j++) {
221         if (in[j] >= cstart && in[j] < cend){
222           col = in[j] - cstart;
223           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
224           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
225           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         } else if (in[j] < 0) continue;
228 #if defined(PETSC_USE_DEBUG)
229         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
230 #endif
231         else {
232           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
233           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
234           if (mat->was_assembled) {
235             if (!aij->colmap) {
236               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
240 	    col--;
241 #else
242             col = aij->colmap[in[j]] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
245               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
248               B = aij->B;
249               b = (Mat_SeqAIJ*)B->data;
250               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
251               ba = b->a;
252             }
253           } else col = in[j];
254           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (!aij->donotstash) {
260         if (roworiented) {
261           if (ignorezeroentries && v[i*n] == 0.0) continue;
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
263         } else {
264           if (ignorezeroentries && v[i] == 0.0) continue;
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatGetValues_MPIAIJ"
275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
276 {
277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
278   PetscErrorCode ierr;
279   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
280   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
281 
282   PetscFunctionBegin;
283   for (i=0; i<m; i++) {
284     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
285     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
286     if (idxm[i] >= rstart && idxm[i] < rend) {
287       row = idxm[i] - rstart;
288       for (j=0; j<n; j++) {
289         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
290         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
291         if (idxn[j] >= cstart && idxn[j] < cend){
292           col = idxn[j] - cstart;
293           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
294         } else {
295           if (!aij->colmap) {
296             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297           }
298 #if defined (PETSC_USE_CTABLE)
299           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
300           col --;
301 #else
302           col = aij->colmap[idxn[j]] - 1;
303 #endif
304           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
305           else {
306             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
307           }
308         }
309       }
310     } else {
311       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
320 {
321   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
322   PetscErrorCode ierr;
323   PetscInt       nstash,reallocs;
324   InsertMode     addv;
325 
326   PetscFunctionBegin;
327   if (aij->donotstash) {
328     PetscFunctionReturn(0);
329   }
330 
331   /* make sure all processors are either in INSERTMODE or ADDMODE */
332   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
333   if (addv == (ADD_VALUES|INSERT_VALUES)) {
334     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
335   }
336   mat->insertmode = addv; /* in case this processor had no cache */
337 
338   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
339   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
340   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
347 {
348   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
349   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
350   PetscErrorCode ierr;
351   PetscMPIInt    n;
352   PetscInt       i,j,rstart,ncols,flg;
353   PetscInt       *row,*col,other_disassembled;
354   PetscScalar    *val;
355   InsertMode     addv = mat->insertmode;
356 
357   PetscFunctionBegin;
358   if (!aij->donotstash) {
359     while (1) {
360       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
361       if (!flg) break;
362 
363       for (i=0; i<n;) {
364         /* Now identify the consecutive vals belonging to the same row */
365         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
366         if (j < n) ncols = j-i;
367         else       ncols = n-i;
368         /* Now assemble all these values with a single function call */
369         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
370         i = j;
371       }
372     }
373     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
374   }
375   a->compressedrow.use     = PETSC_FALSE;
376   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
377   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
378 
379   /* determine if any processor has disassembled, if so we must
380      also disassemble ourselfs, in order that we may reassemble. */
381   /*
382      if nonzero structure of submatrix B cannot change then we know that
383      no processor disassembled thus we can skip this stuff
384   */
385   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
386     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
387     if (mat->was_assembled && !other_disassembled) {
388       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
389       /* reaccess the b because aij->B was changed */
390       b    = (Mat_SeqAIJ *)aij->B->data;
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397 
398   b->inode.use         = PETSC_FALSE;
399   b->compressedrow.use = PETSC_TRUE;
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412   PetscFunctionReturn(0);
413 }
414 
415 #undef __FUNCT__
416 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
418 {
419   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
420   PetscErrorCode ierr;
421 
422   PetscFunctionBegin;
423   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
424   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatZeroRows_MPIAIJ"
430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
431 {
432   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
433   PetscErrorCode ierr;
434   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
435   PetscInt       i,N,*rows,*owners = l->rowners;
436   PetscInt       *nprocs,j,idx,nsends,row;
437   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
438   PetscInt       *rvalues,count,base,slen,*source;
439   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
440   MPI_Comm       comm = A->comm;
441   MPI_Request    *send_waits,*recv_waits;
442   MPI_Status     recv_status,*send_status;
443   IS             istmp;
444   PetscTruth     found;
445 
446   PetscFunctionBegin;
447   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
448   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
449 
450   /*  first count number of contributors to each processor */
451   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
452   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
453   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
454   for (i=0; i<N; i++) {
455     idx = rows[i];
456     found = PETSC_FALSE;
457     for (j=0; j<size; j++) {
458       if (idx >= owners[j] && idx < owners[j+1]) {
459         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
460       }
461     }
462     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
463   }
464   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
465 
466   /* inform other processors of number of messages and max length*/
467   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
468 
469   /* post receives:   */
470   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
471   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
472   for (i=0; i<nrecvs; i++) {
473     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
474   }
475 
476   /* do sends:
477       1) starts[i] gives the starting index in svalues for stuff going to
478          the ith processor
479   */
480   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
481   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
482   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
483   starts[0] = 0;
484   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
485   for (i=0; i<N; i++) {
486     svalues[starts[owner[i]]++] = rows[i];
487   }
488   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
489 
490   starts[0] = 0;
491   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
492   count = 0;
493   for (i=0; i<size; i++) {
494     if (nprocs[2*i+1]) {
495       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
496     }
497   }
498   ierr = PetscFree(starts);CHKERRQ(ierr);
499 
500   base = owners[rank];
501 
502   /*  wait on receives */
503   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
504   source = lens + nrecvs;
505   count  = nrecvs; slen = 0;
506   while (count) {
507     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
508     /* unpack receives into our local space */
509     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
510     source[imdex]  = recv_status.MPI_SOURCE;
511     lens[imdex]    = n;
512     slen          += n;
513     count--;
514   }
515   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
516 
517   /* move the data into the send scatter */
518   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
519   count = 0;
520   for (i=0; i<nrecvs; i++) {
521     values = rvalues + i*nmax;
522     for (j=0; j<lens[i]; j++) {
523       lrows[count++] = values[j] - base;
524     }
525   }
526   ierr = PetscFree(rvalues);CHKERRQ(ierr);
527   ierr = PetscFree(lens);CHKERRQ(ierr);
528   ierr = PetscFree(owner);CHKERRQ(ierr);
529   ierr = PetscFree(nprocs);CHKERRQ(ierr);
530 
531   /* actually zap the local rows */
532   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
533   PetscLogObjectParent(A,istmp);
534 
535   /*
536         Zero the required rows. If the "diagonal block" of the matrix
537      is square and the user wishes to set the diagonal we use seperate
538      code so that MatSetValues() is not called for each diagonal allocating
539      new memory, thus calling lots of mallocs and slowing things down.
540 
541        Contributed by: Mathew Knepley
542   */
543   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
544   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
545   if (diag && (l->A->M == l->A->N)) {
546     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
547   } else if (diag) {
548     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
549     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
550       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
552     }
553     for (i = 0; i < slen; i++) {
554       row  = lrows[i] + rstart;
555       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
556     }
557     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559   } else {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561   }
562   ierr = ISDestroy(istmp);CHKERRQ(ierr);
563   ierr = PetscFree(lrows);CHKERRQ(ierr);
564 
565   /* wait on sends */
566   if (nsends) {
567     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
568     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
569     ierr = PetscFree(send_status);CHKERRQ(ierr);
570   }
571   ierr = PetscFree(send_waits);CHKERRQ(ierr);
572   ierr = PetscFree(svalues);CHKERRQ(ierr);
573 
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatMult_MPIAIJ"
579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
580 {
581   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
582   PetscErrorCode ierr;
583   PetscInt       nt;
584 
585   PetscFunctionBegin;
586   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
587   if (nt != A->n) {
588     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
589   }
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_MPIAIJ"
599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
602   PetscErrorCode ierr;
603 
604   PetscFunctionBegin;
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615 {
616   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
617   PetscErrorCode ierr;
618   PetscTruth     merged;
619 
620   PetscFunctionBegin;
621   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
622   /* do nondiagonal part */
623   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
624   if (!merged) {
625     /* send it on its way */
626     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
627     /* do local part */
628     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
629     /* receive remote parts: note this assumes the values are not actually */
630     /* added in yy until the next line, */
631     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
632   } else {
633     /* do local part */
634     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
635     /* send it on its way */
636     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
637     /* values actually were received in the Begin() but we need to call this nop */
638     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
639   }
640   PetscFunctionReturn(0);
641 }
642 
643 EXTERN_C_BEGIN
644 #undef __FUNCT__
645 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
646 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
647 {
648   MPI_Comm       comm;
649   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
650   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
651   IS             Me,Notme;
652   PetscErrorCode ierr;
653   PetscInt       M,N,first,last,*notme,i;
654   PetscMPIInt    size;
655 
656   PetscFunctionBegin;
657 
658   /* Easy test: symmetric diagonal block */
659   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
660   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
661   if (!*f) PetscFunctionReturn(0);
662   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
663   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
664   if (size == 1) PetscFunctionReturn(0);
665 
666   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
667   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
668   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
669   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
670   for (i=0; i<first; i++) notme[i] = i;
671   for (i=last; i<M; i++) notme[i-last+first] = i;
672   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
673   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
674   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
675   Aoff = Aoffs[0];
676   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
677   Boff = Boffs[0];
678   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
679   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
680   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
681   ierr = ISDestroy(Me);CHKERRQ(ierr);
682   ierr = ISDestroy(Notme);CHKERRQ(ierr);
683 
684   PetscFunctionReturn(0);
685 }
686 EXTERN_C_END
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
690 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
691 {
692   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
693   PetscErrorCode ierr;
694 
695   PetscFunctionBegin;
696   /* do nondiagonal part */
697   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
698   /* send it on its way */
699   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
700   /* do local part */
701   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
702   /* receive remote parts */
703   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708   This only works correctly for square matrices where the subblock A->A is the
709    diagonal block
710 */
711 #undef __FUNCT__
712 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
713 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
714 {
715   PetscErrorCode ierr;
716   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
717 
718   PetscFunctionBegin;
719   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
720   if (a->rstart != a->cstart || a->rend != a->cend) {
721     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
722   }
723   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatScale_MPIAIJ"
729 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
730 {
731   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
736   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPIAIJ"
742 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
743 {
744   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
753   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
757 #else
758   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
759 #endif
760   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
761   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
762   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
763   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
764   ierr = PetscFree(aij);CHKERRQ(ierr);
765 
766   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
767   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
768   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
769   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
770   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
771   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
772   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
773   PetscFunctionReturn(0);
774 }
775 
776 #undef __FUNCT__
777 #define __FUNCT__ "MatView_MPIAIJ_Binary"
778 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
779 {
780   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
781   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
782   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
783   PetscErrorCode    ierr;
784   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
785   int               fd;
786   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
787   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
788   PetscScalar       *column_values;
789 
790   PetscFunctionBegin;
791   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
792   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
793   nz   = A->nz + B->nz;
794   if (!rank) {
795     header[0] = MAT_FILE_COOKIE;
796     header[1] = mat->M;
797     header[2] = mat->N;
798     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
799     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
800     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
801     /* get largest number of rows any processor has */
802     rlen = mat->m;
803     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
804     for (i=1; i<size; i++) {
805       rlen = PetscMax(rlen,range[i+1] - range[i]);
806     }
807   } else {
808     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
809     rlen = mat->m;
810   }
811 
812   /* load up the local row counts */
813   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
814   for (i=0; i<mat->m; i++) {
815     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
816   }
817 
818   /* store the row lengths to the file */
819   if (!rank) {
820     MPI_Status status;
821     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
822     for (i=1; i<size; i++) {
823       rlen = range[i+1] - range[i];
824       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
825       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
826     }
827   } else {
828     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
829   }
830   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
831 
832   /* load up the local column indices */
833   nzmax = nz; /* )th processor needs space a largest processor needs */
834   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
835   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
836   cnt  = 0;
837   for (i=0; i<mat->m; i++) {
838     for (j=B->i[i]; j<B->i[i+1]; j++) {
839       if ( (col = garray[B->j[j]]) > cstart) break;
840       column_indices[cnt++] = col;
841     }
842     for (k=A->i[i]; k<A->i[i+1]; k++) {
843       column_indices[cnt++] = A->j[k] + cstart;
844     }
845     for (; j<B->i[i+1]; j++) {
846       column_indices[cnt++] = garray[B->j[j]];
847     }
848   }
849   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
850 
851   /* store the column indices to the file */
852   if (!rank) {
853     MPI_Status status;
854     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
855     for (i=1; i<size; i++) {
856       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
857       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
858       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
859       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
860     }
861   } else {
862     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
863     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(column_indices);CHKERRQ(ierr);
866 
867   /* load up the local column values */
868   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
869   cnt  = 0;
870   for (i=0; i<mat->m; i++) {
871     for (j=B->i[i]; j<B->i[i+1]; j++) {
872       if ( garray[B->j[j]] > cstart) break;
873       column_values[cnt++] = B->a[j];
874     }
875     for (k=A->i[i]; k<A->i[i+1]; k++) {
876       column_values[cnt++] = A->a[k];
877     }
878     for (; j<B->i[i+1]; j++) {
879       column_values[cnt++] = B->a[j];
880     }
881   }
882   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
883 
884   /* store the column values to the file */
885   if (!rank) {
886     MPI_Status status;
887     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
888     for (i=1; i<size; i++) {
889       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
890       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
891       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
892       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
893     }
894   } else {
895     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
896     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
897   }
898   ierr = PetscFree(column_values);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
904 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
905 {
906   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
907   PetscErrorCode    ierr;
908   PetscMPIInt       rank = aij->rank,size = aij->size;
909   PetscTruth        isdraw,iascii,isbinary;
910   PetscViewer       sviewer;
911   PetscViewerFormat format;
912 
913   PetscFunctionBegin;
914   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
915   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
916   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
917   if (iascii) {
918     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
919     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
920       MatInfo    info;
921       Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data;
922       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
923       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
924       if (!a->inode.size) {
925         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
926 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
927       } else {
928         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
929 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
930       }
931       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
932       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
933       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
934       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
935       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
936       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
937       PetscFunctionReturn(0);
938     } else if (format == PETSC_VIEWER_ASCII_INFO) {
939       Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data;
940       if (a->inode.size) {
941         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr);
942       } else {
943         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
944       }
945       PetscFunctionReturn(0);
946     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
947       PetscFunctionReturn(0);
948     }
949   } else if (isbinary) {
950     if (size == 1) {
951       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
952       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
953     } else {
954       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
955     }
956     PetscFunctionReturn(0);
957   } else if (isdraw) {
958     PetscDraw  draw;
959     PetscTruth isnull;
960     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
961     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
962   }
963 
964   if (size == 1) {
965     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
966     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
967   } else {
968     /* assemble the entire matrix onto first processor. */
969     Mat         A;
970     Mat_SeqAIJ  *Aloc;
971     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
972     PetscScalar *a;
973 
974     if (!rank) {
975       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
976     } else {
977       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
978     }
979     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
980     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
981     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
982     PetscLogObjectParent(mat,A);
983 
984     /* copy over the A part */
985     Aloc = (Mat_SeqAIJ*)aij->A->data;
986     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
987     row = aij->rstart;
988     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
989     for (i=0; i<m; i++) {
990       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
991       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
992     }
993     aj = Aloc->j;
994     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
995 
996     /* copy over the B part */
997     Aloc = (Mat_SeqAIJ*)aij->B->data;
998     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
999     row  = aij->rstart;
1000     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1001     ct   = cols;
1002     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1003     for (i=0; i<m; i++) {
1004       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1005       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1006     }
1007     ierr = PetscFree(ct);CHKERRQ(ierr);
1008     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1009     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1010     /*
1011        Everyone has to call to draw the matrix since the graphics waits are
1012        synchronized across all processors that share the PetscDraw object
1013     */
1014     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1015     if (!rank) {
1016       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1017       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1018     }
1019     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1020     ierr = MatDestroy(A);CHKERRQ(ierr);
1021   }
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatView_MPIAIJ"
1027 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1028 {
1029   PetscErrorCode ierr;
1030   PetscTruth     iascii,isdraw,issocket,isbinary;
1031 
1032   PetscFunctionBegin;
1033   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1034   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1035   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1036   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1037   if (iascii || isdraw || isbinary || issocket) {
1038     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1039   } else {
1040     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1041   }
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 
1046 
1047 #undef __FUNCT__
1048 #define __FUNCT__ "MatRelax_MPIAIJ"
1049 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1050 {
1051   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1052   PetscErrorCode ierr;
1053   Vec            bb1;
1054   PetscScalar    mone=-1.0;
1055 
1056   PetscFunctionBegin;
1057   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1058 
1059   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1060 
1061   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1062     if (flag & SOR_ZERO_INITIAL_GUESS) {
1063       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1064       its--;
1065     }
1066 
1067     while (its--) {
1068       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1080     if (flag & SOR_ZERO_INITIAL_GUESS) {
1081       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1082       its--;
1083     }
1084     while (its--) {
1085       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1086       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1087 
1088       /* update rhs: bb1 = bb - B*x */
1089       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1090       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1091 
1092       /* local sweep */
1093       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1094       CHKERRQ(ierr);
1095     }
1096   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1097     if (flag & SOR_ZERO_INITIAL_GUESS) {
1098       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1099       its--;
1100     }
1101     while (its--) {
1102       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1103       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1104 
1105       /* update rhs: bb1 = bb - B*x */
1106       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1107       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1108 
1109       /* local sweep */
1110       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1111       CHKERRQ(ierr);
1112     }
1113   } else {
1114     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1115   }
1116 
1117   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1123 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1124 {
1125   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1126   Mat            A = mat->A,B = mat->B;
1127   PetscErrorCode ierr;
1128   PetscReal      isend[5],irecv[5];
1129 
1130   PetscFunctionBegin;
1131   info->block_size     = 1.0;
1132   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1133   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1134   isend[3] = info->memory;  isend[4] = info->mallocs;
1135   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1136   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1137   isend[3] += info->memory;  isend[4] += info->mallocs;
1138   if (flag == MAT_LOCAL) {
1139     info->nz_used      = isend[0];
1140     info->nz_allocated = isend[1];
1141     info->nz_unneeded  = isend[2];
1142     info->memory       = isend[3];
1143     info->mallocs      = isend[4];
1144   } else if (flag == MAT_GLOBAL_MAX) {
1145     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1146     info->nz_used      = irecv[0];
1147     info->nz_allocated = irecv[1];
1148     info->nz_unneeded  = irecv[2];
1149     info->memory       = irecv[3];
1150     info->mallocs      = irecv[4];
1151   } else if (flag == MAT_GLOBAL_SUM) {
1152     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1153     info->nz_used      = irecv[0];
1154     info->nz_allocated = irecv[1];
1155     info->nz_unneeded  = irecv[2];
1156     info->memory       = irecv[3];
1157     info->mallocs      = irecv[4];
1158   }
1159   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1160   info->fill_ratio_needed = 0;
1161   info->factor_mallocs    = 0;
1162   info->rows_global       = (double)matin->M;
1163   info->columns_global    = (double)matin->N;
1164   info->rows_local        = (double)matin->m;
1165   info->columns_local     = (double)matin->N;
1166 
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatSetOption_MPIAIJ"
1172 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1173 {
1174   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   switch (op) {
1179   case MAT_NO_NEW_NONZERO_LOCATIONS:
1180   case MAT_YES_NEW_NONZERO_LOCATIONS:
1181   case MAT_COLUMNS_UNSORTED:
1182   case MAT_COLUMNS_SORTED:
1183   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1184   case MAT_KEEP_ZEROED_ROWS:
1185   case MAT_NEW_NONZERO_LOCATION_ERR:
1186   case MAT_USE_INODES:
1187   case MAT_DO_NOT_USE_INODES:
1188   case MAT_IGNORE_ZERO_ENTRIES:
1189     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1190     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1191     break;
1192   case MAT_ROW_ORIENTED:
1193     a->roworiented = PETSC_TRUE;
1194     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1195     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1196     break;
1197   case MAT_ROWS_SORTED:
1198   case MAT_ROWS_UNSORTED:
1199   case MAT_YES_NEW_DIAGONALS:
1200     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1201     break;
1202   case MAT_COLUMN_ORIENTED:
1203     a->roworiented = PETSC_FALSE;
1204     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1205     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1206     break;
1207   case MAT_IGNORE_OFF_PROC_ENTRIES:
1208     a->donotstash = PETSC_TRUE;
1209     break;
1210   case MAT_NO_NEW_DIAGONALS:
1211     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1212   case MAT_SYMMETRIC:
1213   case MAT_STRUCTURALLY_SYMMETRIC:
1214   case MAT_HERMITIAN:
1215   case MAT_SYMMETRY_ETERNAL:
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     break;
1218   case MAT_NOT_SYMMETRIC:
1219   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1220   case MAT_NOT_HERMITIAN:
1221   case MAT_NOT_SYMMETRY_ETERNAL:
1222     break;
1223   default:
1224     SETERRQ(PETSC_ERR_SUP,"unknown option");
1225   }
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatGetRow_MPIAIJ"
1231 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1232 {
1233   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1234   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1235   PetscErrorCode ierr;
1236   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1237   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1238   PetscInt       *cmap,*idx_p;
1239 
1240   PetscFunctionBegin;
1241   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1242   mat->getrowactive = PETSC_TRUE;
1243 
1244   if (!mat->rowvalues && (idx || v)) {
1245     /*
1246         allocate enough space to hold information from the longest row.
1247     */
1248     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1249     PetscInt     max = 1,tmp;
1250     for (i=0; i<matin->m; i++) {
1251       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1252       if (max < tmp) { max = tmp; }
1253     }
1254     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1255     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1256   }
1257 
1258   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1259   lrow = row - rstart;
1260 
1261   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1262   if (!v)   {pvA = 0; pvB = 0;}
1263   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1264   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1265   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1266   nztot = nzA + nzB;
1267 
1268   cmap  = mat->garray;
1269   if (v  || idx) {
1270     if (nztot) {
1271       /* Sort by increasing column numbers, assuming A and B already sorted */
1272       PetscInt imark = -1;
1273       if (v) {
1274         *v = v_p = mat->rowvalues;
1275         for (i=0; i<nzB; i++) {
1276           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1277           else break;
1278         }
1279         imark = i;
1280         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1281         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1282       }
1283       if (idx) {
1284         *idx = idx_p = mat->rowindices;
1285         if (imark > -1) {
1286           for (i=0; i<imark; i++) {
1287             idx_p[i] = cmap[cworkB[i]];
1288           }
1289         } else {
1290           for (i=0; i<nzB; i++) {
1291             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1292             else break;
1293           }
1294           imark = i;
1295         }
1296         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1297         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1298       }
1299     } else {
1300       if (idx) *idx = 0;
1301       if (v)   *v   = 0;
1302     }
1303   }
1304   *nz = nztot;
1305   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1306   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1312 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1313 {
1314   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1315 
1316   PetscFunctionBegin;
1317   if (!aij->getrowactive) {
1318     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1319   }
1320   aij->getrowactive = PETSC_FALSE;
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatNorm_MPIAIJ"
1326 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1327 {
1328   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1329   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1330   PetscErrorCode ierr;
1331   PetscInt       i,j,cstart = aij->cstart;
1332   PetscReal      sum = 0.0;
1333   PetscScalar    *v;
1334 
1335   PetscFunctionBegin;
1336   if (aij->size == 1) {
1337     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1338   } else {
1339     if (type == NORM_FROBENIUS) {
1340       v = amat->a;
1341       for (i=0; i<amat->nz; i++) {
1342 #if defined(PETSC_USE_COMPLEX)
1343         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1344 #else
1345         sum += (*v)*(*v); v++;
1346 #endif
1347       }
1348       v = bmat->a;
1349       for (i=0; i<bmat->nz; i++) {
1350 #if defined(PETSC_USE_COMPLEX)
1351         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1352 #else
1353         sum += (*v)*(*v); v++;
1354 #endif
1355       }
1356       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1357       *norm = sqrt(*norm);
1358     } else if (type == NORM_1) { /* max column norm */
1359       PetscReal *tmp,*tmp2;
1360       PetscInt    *jj,*garray = aij->garray;
1361       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1362       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1363       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1364       *norm = 0.0;
1365       v = amat->a; jj = amat->j;
1366       for (j=0; j<amat->nz; j++) {
1367         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1368       }
1369       v = bmat->a; jj = bmat->j;
1370       for (j=0; j<bmat->nz; j++) {
1371         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1372       }
1373       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1374       for (j=0; j<mat->N; j++) {
1375         if (tmp2[j] > *norm) *norm = tmp2[j];
1376       }
1377       ierr = PetscFree(tmp);CHKERRQ(ierr);
1378       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1379     } else if (type == NORM_INFINITY) { /* max row norm */
1380       PetscReal ntemp = 0.0;
1381       for (j=0; j<aij->A->m; j++) {
1382         v = amat->a + amat->i[j];
1383         sum = 0.0;
1384         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1385           sum += PetscAbsScalar(*v); v++;
1386         }
1387         v = bmat->a + bmat->i[j];
1388         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1389           sum += PetscAbsScalar(*v); v++;
1390         }
1391         if (sum > ntemp) ntemp = sum;
1392       }
1393       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1394     } else {
1395       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1396     }
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatTranspose_MPIAIJ"
1403 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1404 {
1405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1406   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1407   PetscErrorCode ierr;
1408   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1409   Mat            B;
1410   PetscScalar    *array;
1411 
1412   PetscFunctionBegin;
1413   if (!matout && M != N) {
1414     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1415   }
1416 
1417   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1418   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1419   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1420 
1421   /* copy over the A part */
1422   Aloc = (Mat_SeqAIJ*)a->A->data;
1423   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1424   row = a->rstart;
1425   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1426   for (i=0; i<m; i++) {
1427     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1428     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1429   }
1430   aj = Aloc->j;
1431   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1432 
1433   /* copy over the B part */
1434   Aloc = (Mat_SeqAIJ*)a->B->data;
1435   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1436   row  = a->rstart;
1437   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1438   ct   = cols;
1439   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1440   for (i=0; i<m; i++) {
1441     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1442     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1443   }
1444   ierr = PetscFree(ct);CHKERRQ(ierr);
1445   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1446   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1447   if (matout) {
1448     *matout = B;
1449   } else {
1450     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1451   }
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1457 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1458 {
1459   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1460   Mat            a = aij->A,b = aij->B;
1461   PetscErrorCode ierr;
1462   PetscInt       s1,s2,s3;
1463 
1464   PetscFunctionBegin;
1465   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1466   if (rr) {
1467     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1468     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1469     /* Overlap communication with computation. */
1470     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1471   }
1472   if (ll) {
1473     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1474     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1475     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1476   }
1477   /* scale  the diagonal block */
1478   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1479 
1480   if (rr) {
1481     /* Do a scatter end and then right scale the off-diagonal block */
1482     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1483     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1484   }
1485 
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 
1490 #undef __FUNCT__
1491 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1492 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1493 {
1494   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1495   PetscErrorCode ierr;
1496 
1497   PetscFunctionBegin;
1498   if (!a->rank) {
1499     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1506 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1507 {
1508   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1509   PetscErrorCode ierr;
1510 
1511   PetscFunctionBegin;
1512   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1513   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1514   PetscFunctionReturn(0);
1515 }
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1518 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1519 {
1520   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1521   PetscErrorCode ierr;
1522 
1523   PetscFunctionBegin;
1524   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatEqual_MPIAIJ"
1530 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1531 {
1532   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1533   Mat            a,b,c,d;
1534   PetscTruth     flg;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   a = matA->A; b = matA->B;
1539   c = matB->A; d = matB->B;
1540 
1541   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1542   if (flg) {
1543     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1544   }
1545   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatCopy_MPIAIJ"
1551 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1552 {
1553   PetscErrorCode ierr;
1554   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1555   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1556 
1557   PetscFunctionBegin;
1558   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1559   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1560     /* because of the column compression in the off-processor part of the matrix a->B,
1561        the number of columns in a->B and b->B may be different, hence we cannot call
1562        the MatCopy() directly on the two parts. If need be, we can provide a more
1563        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1564        then copying the submatrices */
1565     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1566   } else {
1567     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1568     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1569   }
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1575 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1576 {
1577   PetscErrorCode ierr;
1578 
1579   PetscFunctionBegin;
1580   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #include "petscblaslapack.h"
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatAXPY_MPIAIJ"
1587 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1588 {
1589   PetscErrorCode ierr;
1590   PetscInt       i;
1591   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1592   PetscBLASInt   bnz,one=1;
1593   Mat_SeqAIJ     *x,*y;
1594 
1595   PetscFunctionBegin;
1596   if (str == SAME_NONZERO_PATTERN) {
1597     x = (Mat_SeqAIJ *)xx->A->data;
1598     y = (Mat_SeqAIJ *)yy->A->data;
1599     bnz = (PetscBLASInt)x->nz;
1600     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1601     x = (Mat_SeqAIJ *)xx->B->data;
1602     y = (Mat_SeqAIJ *)yy->B->data;
1603     bnz = (PetscBLASInt)x->nz;
1604     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1605   } else if (str == SUBSET_NONZERO_PATTERN) {
1606     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1607 
1608     x = (Mat_SeqAIJ *)xx->B->data;
1609     y = (Mat_SeqAIJ *)yy->B->data;
1610     if (y->xtoy && y->XtoY != xx->B) {
1611       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1612       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1613     }
1614     if (!y->xtoy) { /* get xtoy */
1615       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1616       y->XtoY = xx->B;
1617     }
1618     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1619   } else {
1620     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1621   }
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 /* -------------------------------------------------------------------*/
1626 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1627        MatGetRow_MPIAIJ,
1628        MatRestoreRow_MPIAIJ,
1629        MatMult_MPIAIJ,
1630 /* 4*/ MatMultAdd_MPIAIJ,
1631        MatMultTranspose_MPIAIJ,
1632        MatMultTransposeAdd_MPIAIJ,
1633        0,
1634        0,
1635        0,
1636 /*10*/ 0,
1637        0,
1638        0,
1639        MatRelax_MPIAIJ,
1640        MatTranspose_MPIAIJ,
1641 /*15*/ MatGetInfo_MPIAIJ,
1642        MatEqual_MPIAIJ,
1643        MatGetDiagonal_MPIAIJ,
1644        MatDiagonalScale_MPIAIJ,
1645        MatNorm_MPIAIJ,
1646 /*20*/ MatAssemblyBegin_MPIAIJ,
1647        MatAssemblyEnd_MPIAIJ,
1648        0,
1649        MatSetOption_MPIAIJ,
1650        MatZeroEntries_MPIAIJ,
1651 /*25*/ MatZeroRows_MPIAIJ,
1652        0,
1653        0,
1654        0,
1655        0,
1656 /*30*/ MatSetUpPreallocation_MPIAIJ,
1657        0,
1658        0,
1659        0,
1660        0,
1661 /*35*/ MatDuplicate_MPIAIJ,
1662        0,
1663        0,
1664        0,
1665        0,
1666 /*40*/ MatAXPY_MPIAIJ,
1667        MatGetSubMatrices_MPIAIJ,
1668        MatIncreaseOverlap_MPIAIJ,
1669        MatGetValues_MPIAIJ,
1670        MatCopy_MPIAIJ,
1671 /*45*/ MatPrintHelp_MPIAIJ,
1672        MatScale_MPIAIJ,
1673        0,
1674        0,
1675        0,
1676 /*50*/ MatSetBlockSize_MPIAIJ,
1677        0,
1678        0,
1679        0,
1680        0,
1681 /*55*/ MatFDColoringCreate_MPIAIJ,
1682        0,
1683        MatSetUnfactored_MPIAIJ,
1684        0,
1685        0,
1686 /*60*/ MatGetSubMatrix_MPIAIJ,
1687        MatDestroy_MPIAIJ,
1688        MatView_MPIAIJ,
1689        MatGetPetscMaps_Petsc,
1690        0,
1691 /*65*/ 0,
1692        0,
1693        0,
1694        0,
1695        0,
1696 /*70*/ 0,
1697        0,
1698        MatSetColoring_MPIAIJ,
1699 #if defined(PETSC_HAVE_ADIC)
1700        MatSetValuesAdic_MPIAIJ,
1701 #else
1702        0,
1703 #endif
1704        MatSetValuesAdifor_MPIAIJ,
1705 /*75*/ 0,
1706        0,
1707        0,
1708        0,
1709        0,
1710 /*80*/ 0,
1711        0,
1712        0,
1713        0,
1714 /*84*/ MatLoad_MPIAIJ,
1715        0,
1716        0,
1717        0,
1718        0,
1719        0,
1720 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1721        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1722        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1723        MatPtAP_Basic,
1724        MatPtAPSymbolic_MPIAIJ,
1725 /*95*/ MatPtAPNumeric_MPIAIJ,
1726        0,
1727        0,
1728        0,
1729        0,
1730 /*100*/0,
1731        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1732        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1733 };
1734 
1735 /* ----------------------------------------------------------------------------------------*/
1736 
1737 EXTERN_C_BEGIN
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1740 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1741 {
1742   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1743   PetscErrorCode ierr;
1744 
1745   PetscFunctionBegin;
1746   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1747   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1748   PetscFunctionReturn(0);
1749 }
1750 EXTERN_C_END
1751 
1752 EXTERN_C_BEGIN
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1755 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1756 {
1757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1758   PetscErrorCode ierr;
1759 
1760   PetscFunctionBegin;
1761   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1762   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765 EXTERN_C_END
1766 
1767 #include "petscpc.h"
1768 EXTERN_C_BEGIN
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1771 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1772 {
1773   Mat_MPIAIJ     *b;
1774   PetscErrorCode ierr;
1775   PetscInt       i;
1776 
1777   PetscFunctionBegin;
1778   B->preallocated = PETSC_TRUE;
1779   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1780   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1781   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1782   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1783   if (d_nnz) {
1784     for (i=0; i<B->m; i++) {
1785       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1786     }
1787   }
1788   if (o_nnz) {
1789     for (i=0; i<B->m; i++) {
1790       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1791     }
1792   }
1793   b = (Mat_MPIAIJ*)B->data;
1794   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1795   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1796 
1797   PetscFunctionReturn(0);
1798 }
1799 EXTERN_C_END
1800 
1801 #undef __FUNCT__
1802 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1803 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1804 {
1805   Mat            mat;
1806   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1807   PetscErrorCode ierr;
1808 
1809   PetscFunctionBegin;
1810   *newmat       = 0;
1811   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1812   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1813   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1814   a    = (Mat_MPIAIJ*)mat->data;
1815 
1816   mat->factor       = matin->factor;
1817   mat->bs           = matin->bs;
1818   mat->assembled    = PETSC_TRUE;
1819   mat->insertmode   = NOT_SET_VALUES;
1820   mat->preallocated = PETSC_TRUE;
1821 
1822   a->rstart       = oldmat->rstart;
1823   a->rend         = oldmat->rend;
1824   a->cstart       = oldmat->cstart;
1825   a->cend         = oldmat->cend;
1826   a->size         = oldmat->size;
1827   a->rank         = oldmat->rank;
1828   a->donotstash   = oldmat->donotstash;
1829   a->roworiented  = oldmat->roworiented;
1830   a->rowindices   = 0;
1831   a->rowvalues    = 0;
1832   a->getrowactive = PETSC_FALSE;
1833 
1834   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1835   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1836   if (oldmat->colmap) {
1837 #if defined (PETSC_USE_CTABLE)
1838     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1839 #else
1840     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1841     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1842     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1843 #endif
1844   } else a->colmap = 0;
1845   if (oldmat->garray) {
1846     PetscInt len;
1847     len  = oldmat->B->n;
1848     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1849     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1850     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1851   } else a->garray = 0;
1852 
1853   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1854   PetscLogObjectParent(mat,a->lvec);
1855   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1856   PetscLogObjectParent(mat,a->Mvctx);
1857   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1858   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1859   PetscLogObjectParent(mat,a->A);
1860   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1861   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1862   PetscLogObjectParent(mat,a->B);
1863   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1864   *newmat = mat;
1865   PetscFunctionReturn(0);
1866 }
1867 
1868 #include "petscsys.h"
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatLoad_MPIAIJ"
1872 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1873 {
1874   Mat            A;
1875   PetscScalar    *vals,*svals;
1876   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1877   MPI_Status     status;
1878   PetscErrorCode ierr;
1879   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1880   PetscInt       i,nz,j,rstart,rend;
1881   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1882   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1883   PetscInt       cend,cstart,n,*rowners;
1884   int            fd;
1885 
1886   PetscFunctionBegin;
1887   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1888   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1889   if (!rank) {
1890     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1891     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1892     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1893   }
1894 
1895   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1896   M = header[1]; N = header[2];
1897   /* determine ownership of all rows */
1898   m    = M/size + ((M % size) > rank);
1899   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1900   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1901   rowners[0] = 0;
1902   for (i=2; i<=size; i++) {
1903     rowners[i] += rowners[i-1];
1904   }
1905   rstart = rowners[rank];
1906   rend   = rowners[rank+1];
1907 
1908   /* distribute row lengths to all processors */
1909   ierr    = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
1910   if (!rank) {
1911     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1912     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1913     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1914     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1915     for (j=0; j<m; j++) {
1916       procsnz[0] += ourlens[j];
1917     }
1918     for (i=1; i<size; i++) {
1919       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1920       /* calculate the number of nonzeros on each processor */
1921       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1922         procsnz[i] += rowlengths[j];
1923       }
1924       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1925     }
1926     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1927   } else {
1928     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1929   }
1930 
1931   if (!rank) {
1932     /* determine max buffer needed and allocate it */
1933     maxnz = 0;
1934     for (i=0; i<size; i++) {
1935       maxnz = PetscMax(maxnz,procsnz[i]);
1936     }
1937     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1938 
1939     /* read in my part of the matrix column indices  */
1940     nz   = procsnz[0];
1941     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1942     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1943 
1944     /* read in every one elses and ship off */
1945     for (i=1; i<size; i++) {
1946       nz   = procsnz[i];
1947       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1948       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1949     }
1950     ierr = PetscFree(cols);CHKERRQ(ierr);
1951   } else {
1952     /* determine buffer space needed for message */
1953     nz = 0;
1954     for (i=0; i<m; i++) {
1955       nz += ourlens[i];
1956     }
1957     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1958 
1959     /* receive message of column indices*/
1960     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1961     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1962     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1963   }
1964 
1965   /* determine column ownership if matrix is not square */
1966   if (N != M) {
1967     n      = N/size + ((N % size) > rank);
1968     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1969     cstart = cend - n;
1970   } else {
1971     cstart = rstart;
1972     cend   = rend;
1973     n      = cend - cstart;
1974   }
1975 
1976   /* loop over local rows, determining number of off diagonal entries */
1977   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1978   jj = 0;
1979   for (i=0; i<m; i++) {
1980     for (j=0; j<ourlens[i]; j++) {
1981       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1982       jj++;
1983     }
1984   }
1985 
1986   /* create our matrix */
1987   for (i=0; i<m; i++) {
1988     ourlens[i] -= offlens[i];
1989   }
1990   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
1991   ierr = MatSetType(A,type);CHKERRQ(ierr);
1992   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
1993 
1994   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1995   for (i=0; i<m; i++) {
1996     ourlens[i] += offlens[i];
1997   }
1998 
1999   if (!rank) {
2000     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2001 
2002     /* read in my part of the matrix numerical values  */
2003     nz   = procsnz[0];
2004     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2005 
2006     /* insert into matrix */
2007     jj      = rstart;
2008     smycols = mycols;
2009     svals   = vals;
2010     for (i=0; i<m; i++) {
2011       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2012       smycols += ourlens[i];
2013       svals   += ourlens[i];
2014       jj++;
2015     }
2016 
2017     /* read in other processors and ship out */
2018     for (i=1; i<size; i++) {
2019       nz   = procsnz[i];
2020       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2021       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2022     }
2023     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2024   } else {
2025     /* receive numeric values */
2026     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2027 
2028     /* receive message of values*/
2029     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2030     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2031     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2032 
2033     /* insert into matrix */
2034     jj      = rstart;
2035     smycols = mycols;
2036     svals   = vals;
2037     for (i=0; i<m; i++) {
2038       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2039       smycols += ourlens[i];
2040       svals   += ourlens[i];
2041       jj++;
2042     }
2043   }
2044   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2045   ierr = PetscFree(vals);CHKERRQ(ierr);
2046   ierr = PetscFree(mycols);CHKERRQ(ierr);
2047   ierr = PetscFree(rowners);CHKERRQ(ierr);
2048 
2049   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2050   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2051   *newmat = A;
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2057 /*
2058     Not great since it makes two copies of the submatrix, first an SeqAIJ
2059   in local and then by concatenating the local matrices the end result.
2060   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2061 */
2062 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2063 {
2064   PetscErrorCode ierr;
2065   PetscMPIInt    rank,size;
2066   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2067   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2068   Mat            *local,M,Mreuse;
2069   PetscScalar    *vwork,*aa;
2070   MPI_Comm       comm = mat->comm;
2071   Mat_SeqAIJ     *aij;
2072 
2073 
2074   PetscFunctionBegin;
2075   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2077 
2078   if (call ==  MAT_REUSE_MATRIX) {
2079     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2080     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2081     local = &Mreuse;
2082     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2083   } else {
2084     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2085     Mreuse = *local;
2086     ierr   = PetscFree(local);CHKERRQ(ierr);
2087   }
2088 
2089   /*
2090       m - number of local rows
2091       n - number of columns (same on all processors)
2092       rstart - first row in new global matrix generated
2093   */
2094   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2095   if (call == MAT_INITIAL_MATRIX) {
2096     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2097     ii  = aij->i;
2098     jj  = aij->j;
2099 
2100     /*
2101         Determine the number of non-zeros in the diagonal and off-diagonal
2102         portions of the matrix in order to do correct preallocation
2103     */
2104 
2105     /* first get start and end of "diagonal" columns */
2106     if (csize == PETSC_DECIDE) {
2107       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2108       if (mglobal == n) { /* square matrix */
2109 	nlocal = m;
2110       } else {
2111         nlocal = n/size + ((n % size) > rank);
2112       }
2113     } else {
2114       nlocal = csize;
2115     }
2116     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2117     rstart = rend - nlocal;
2118     if (rank == size - 1 && rend != n) {
2119       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2120     }
2121 
2122     /* next, compute all the lengths */
2123     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2124     olens = dlens + m;
2125     for (i=0; i<m; i++) {
2126       jend = ii[i+1] - ii[i];
2127       olen = 0;
2128       dlen = 0;
2129       for (j=0; j<jend; j++) {
2130         if (*jj < rstart || *jj >= rend) olen++;
2131         else dlen++;
2132         jj++;
2133       }
2134       olens[i] = olen;
2135       dlens[i] = dlen;
2136     }
2137     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2138     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2139     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2140     ierr = PetscFree(dlens);CHKERRQ(ierr);
2141   } else {
2142     PetscInt ml,nl;
2143 
2144     M = *newmat;
2145     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2146     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2147     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2148     /*
2149          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2150        rather than the slower MatSetValues().
2151     */
2152     M->was_assembled = PETSC_TRUE;
2153     M->assembled     = PETSC_FALSE;
2154   }
2155   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2156   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2157   ii  = aij->i;
2158   jj  = aij->j;
2159   aa  = aij->a;
2160   for (i=0; i<m; i++) {
2161     row   = rstart + i;
2162     nz    = ii[i+1] - ii[i];
2163     cwork = jj;     jj += nz;
2164     vwork = aa;     aa += nz;
2165     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2166   }
2167 
2168   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2169   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2170   *newmat = M;
2171 
2172   /* save submatrix used in processor for next request */
2173   if (call ==  MAT_INITIAL_MATRIX) {
2174     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2175     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2176   }
2177 
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 EXTERN_C_BEGIN
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2184 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2185 {
2186   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2187   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2188   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2189   const PetscInt *JJ;
2190   PetscScalar    *values;
2191   PetscErrorCode ierr;
2192 
2193   PetscFunctionBegin;
2194 #if defined(PETSC_OPT_g)
2195   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2196 #endif
2197   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2198   o_nnz = d_nnz + m;
2199 
2200   for (i=0; i<m; i++) {
2201     nnz     = I[i+1]- I[i];
2202     JJ      = J + I[i];
2203     nnz_max = PetscMax(nnz_max,nnz);
2204 #if defined(PETSC_OPT_g)
2205     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2206 #endif
2207     for (j=0; j<nnz; j++) {
2208       if (*JJ >= cstart) break;
2209       JJ++;
2210     }
2211     d = 0;
2212     for (; j<nnz; j++) {
2213       if (*JJ++ >= cend) break;
2214       d++;
2215     }
2216     d_nnz[i] = d;
2217     o_nnz[i] = nnz - d;
2218   }
2219   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2220   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2221 
2222   if (v) values = (PetscScalar*)v;
2223   else {
2224     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2225     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2226   }
2227 
2228   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2229   for (i=0; i<m; i++) {
2230     ii   = i + rstart;
2231     nnz  = I[i+1]- I[i];
2232     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2233   }
2234   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2235   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2236   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2237 
2238   if (!v) {
2239     ierr = PetscFree(values);CHKERRQ(ierr);
2240   }
2241   PetscFunctionReturn(0);
2242 }
2243 EXTERN_C_END
2244 
2245 #undef __FUNCT__
2246 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2247 /*@C
2248    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2249    (the default parallel PETSc format).
2250 
2251    Collective on MPI_Comm
2252 
2253    Input Parameters:
2254 +  A - the matrix
2255 .  i - the indices into j for the start of each local row (starts with zero)
2256 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2257 -  v - optional values in the matrix
2258 
2259    Level: developer
2260 
2261 .keywords: matrix, aij, compressed row, sparse, parallel
2262 
2263 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2264 @*/
2265 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2266 {
2267   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2268 
2269   PetscFunctionBegin;
2270   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2271   if (f) {
2272     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2273   }
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 #undef __FUNCT__
2278 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2279 /*@C
2280    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2281    (the default parallel PETSc format).  For good matrix assembly performance
2282    the user should preallocate the matrix storage by setting the parameters
2283    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2284    performance can be increased by more than a factor of 50.
2285 
2286    Collective on MPI_Comm
2287 
2288    Input Parameters:
2289 +  A - the matrix
2290 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2291            (same value is used for all local rows)
2292 .  d_nnz - array containing the number of nonzeros in the various rows of the
2293            DIAGONAL portion of the local submatrix (possibly different for each row)
2294            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2295            The size of this array is equal to the number of local rows, i.e 'm'.
2296            You must leave room for the diagonal entry even if it is zero.
2297 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2298            submatrix (same value is used for all local rows).
2299 -  o_nnz - array containing the number of nonzeros in the various rows of the
2300            OFF-DIAGONAL portion of the local submatrix (possibly different for
2301            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2302            structure. The size of this array is equal to the number
2303            of local rows, i.e 'm'.
2304 
2305    If the *_nnz parameter is given then the *_nz parameter is ignored
2306 
2307    The AIJ format (also called the Yale sparse matrix format or
2308    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2309    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2310 
2311    The parallel matrix is partitioned such that the first m0 rows belong to
2312    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2313    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2314 
2315    The DIAGONAL portion of the local submatrix of a processor can be defined
2316    as the submatrix which is obtained by extraction the part corresponding
2317    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2318    first row that belongs to the processor, and r2 is the last row belonging
2319    to the this processor. This is a square mxm matrix. The remaining portion
2320    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2321 
2322    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2323 
2324    Example usage:
2325 
2326    Consider the following 8x8 matrix with 34 non-zero values, that is
2327    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2328    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2329    as follows:
2330 
2331 .vb
2332             1  2  0  |  0  3  0  |  0  4
2333     Proc0   0  5  6  |  7  0  0  |  8  0
2334             9  0 10  | 11  0  0  | 12  0
2335     -------------------------------------
2336            13  0 14  | 15 16 17  |  0  0
2337     Proc1   0 18  0  | 19 20 21  |  0  0
2338             0  0  0  | 22 23  0  | 24  0
2339     -------------------------------------
2340     Proc2  25 26 27  |  0  0 28  | 29  0
2341            30  0  0  | 31 32 33  |  0 34
2342 .ve
2343 
2344    This can be represented as a collection of submatrices as:
2345 
2346 .vb
2347       A B C
2348       D E F
2349       G H I
2350 .ve
2351 
2352    Where the submatrices A,B,C are owned by proc0, D,E,F are
2353    owned by proc1, G,H,I are owned by proc2.
2354 
2355    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2356    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2357    The 'M','N' parameters are 8,8, and have the same values on all procs.
2358 
2359    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2360    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2361    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2362    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2363    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2364    matrix, ans [DF] as another SeqAIJ matrix.
2365 
2366    When d_nz, o_nz parameters are specified, d_nz storage elements are
2367    allocated for every row of the local diagonal submatrix, and o_nz
2368    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2369    One way to choose d_nz and o_nz is to use the max nonzerors per local
2370    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2371    In this case, the values of d_nz,o_nz are:
2372 .vb
2373      proc0 : dnz = 2, o_nz = 2
2374      proc1 : dnz = 3, o_nz = 2
2375      proc2 : dnz = 1, o_nz = 4
2376 .ve
2377    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2378    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2379    for proc3. i.e we are using 12+15+10=37 storage locations to store
2380    34 values.
2381 
2382    When d_nnz, o_nnz parameters are specified, the storage is specified
2383    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2384    In the above case the values for d_nnz,o_nnz are:
2385 .vb
2386      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2387      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2388      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2389 .ve
2390    Here the space allocated is sum of all the above values i.e 34, and
2391    hence pre-allocation is perfect.
2392 
2393    Level: intermediate
2394 
2395 .keywords: matrix, aij, compressed row, sparse, parallel
2396 
2397 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2398           MPIAIJ
2399 @*/
2400 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2401 {
2402   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2403 
2404   PetscFunctionBegin;
2405   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2406   if (f) {
2407     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2408   }
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 #undef __FUNCT__
2413 #define __FUNCT__ "MatCreateMPIAIJ"
2414 /*@C
2415    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2416    (the default parallel PETSc format).  For good matrix assembly performance
2417    the user should preallocate the matrix storage by setting the parameters
2418    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2419    performance can be increased by more than a factor of 50.
2420 
2421    Collective on MPI_Comm
2422 
2423    Input Parameters:
2424 +  comm - MPI communicator
2425 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2426            This value should be the same as the local size used in creating the
2427            y vector for the matrix-vector product y = Ax.
2428 .  n - This value should be the same as the local size used in creating the
2429        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2430        calculated if N is given) For square matrices n is almost always m.
2431 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2432 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2433 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2434            (same value is used for all local rows)
2435 .  d_nnz - array containing the number of nonzeros in the various rows of the
2436            DIAGONAL portion of the local submatrix (possibly different for each row)
2437            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2438            The size of this array is equal to the number of local rows, i.e 'm'.
2439            You must leave room for the diagonal entry even if it is zero.
2440 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2441            submatrix (same value is used for all local rows).
2442 -  o_nnz - array containing the number of nonzeros in the various rows of the
2443            OFF-DIAGONAL portion of the local submatrix (possibly different for
2444            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2445            structure. The size of this array is equal to the number
2446            of local rows, i.e 'm'.
2447 
2448    Output Parameter:
2449 .  A - the matrix
2450 
2451    Notes:
2452    If the *_nnz parameter is given then the *_nz parameter is ignored
2453 
2454    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2455    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2456    storage requirements for this matrix.
2457 
2458    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2459    processor than it must be used on all processors that share the object for
2460    that argument.
2461 
2462    The user MUST specify either the local or global matrix dimensions
2463    (possibly both).
2464 
2465    The parallel matrix is partitioned such that the first m0 rows belong to
2466    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2467    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2468 
2469    The DIAGONAL portion of the local submatrix of a processor can be defined
2470    as the submatrix which is obtained by extraction the part corresponding
2471    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2472    first row that belongs to the processor, and r2 is the last row belonging
2473    to the this processor. This is a square mxm matrix. The remaining portion
2474    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2475 
2476    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2477 
2478    When calling this routine with a single process communicator, a matrix of
2479    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2480    type of communicator, use the construction mechanism:
2481      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2482 
2483    By default, this format uses inodes (identical nodes) when possible.
2484    We search for consecutive rows with the same nonzero structure, thereby
2485    reusing matrix information to achieve increased efficiency.
2486 
2487    Options Database Keys:
2488 +  -mat_aij_no_inode  - Do not use inodes
2489 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2490 -  -mat_aij_oneindex - Internally use indexing starting at 1
2491         rather than 0.  Note that when calling MatSetValues(),
2492         the user still MUST index entries starting at 0!
2493 
2494 
2495    Example usage:
2496 
2497    Consider the following 8x8 matrix with 34 non-zero values, that is
2498    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2499    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2500    as follows:
2501 
2502 .vb
2503             1  2  0  |  0  3  0  |  0  4
2504     Proc0   0  5  6  |  7  0  0  |  8  0
2505             9  0 10  | 11  0  0  | 12  0
2506     -------------------------------------
2507            13  0 14  | 15 16 17  |  0  0
2508     Proc1   0 18  0  | 19 20 21  |  0  0
2509             0  0  0  | 22 23  0  | 24  0
2510     -------------------------------------
2511     Proc2  25 26 27  |  0  0 28  | 29  0
2512            30  0  0  | 31 32 33  |  0 34
2513 .ve
2514 
2515    This can be represented as a collection of submatrices as:
2516 
2517 .vb
2518       A B C
2519       D E F
2520       G H I
2521 .ve
2522 
2523    Where the submatrices A,B,C are owned by proc0, D,E,F are
2524    owned by proc1, G,H,I are owned by proc2.
2525 
2526    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2527    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2528    The 'M','N' parameters are 8,8, and have the same values on all procs.
2529 
2530    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2531    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2532    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2533    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2534    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2535    matrix, ans [DF] as another SeqAIJ matrix.
2536 
2537    When d_nz, o_nz parameters are specified, d_nz storage elements are
2538    allocated for every row of the local diagonal submatrix, and o_nz
2539    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2540    One way to choose d_nz and o_nz is to use the max nonzerors per local
2541    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2542    In this case, the values of d_nz,o_nz are:
2543 .vb
2544      proc0 : dnz = 2, o_nz = 2
2545      proc1 : dnz = 3, o_nz = 2
2546      proc2 : dnz = 1, o_nz = 4
2547 .ve
2548    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2549    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2550    for proc3. i.e we are using 12+15+10=37 storage locations to store
2551    34 values.
2552 
2553    When d_nnz, o_nnz parameters are specified, the storage is specified
2554    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2555    In the above case the values for d_nnz,o_nnz are:
2556 .vb
2557      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2558      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2559      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2560 .ve
2561    Here the space allocated is sum of all the above values i.e 34, and
2562    hence pre-allocation is perfect.
2563 
2564    Level: intermediate
2565 
2566 .keywords: matrix, aij, compressed row, sparse, parallel
2567 
2568 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2569           MPIAIJ
2570 @*/
2571 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2572 {
2573   PetscErrorCode ierr;
2574   PetscMPIInt    size;
2575 
2576   PetscFunctionBegin;
2577   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2578   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2579   if (size > 1) {
2580     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2581     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2582   } else {
2583     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2584     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2585   }
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 #undef __FUNCT__
2590 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2591 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2592 {
2593   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2594 
2595   PetscFunctionBegin;
2596   *Ad     = a->A;
2597   *Ao     = a->B;
2598   *colmap = a->garray;
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 #undef __FUNCT__
2603 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2604 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2605 {
2606   PetscErrorCode ierr;
2607   PetscInt       i;
2608   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2609 
2610   PetscFunctionBegin;
2611   if (coloring->ctype == IS_COLORING_LOCAL) {
2612     ISColoringValue *allcolors,*colors;
2613     ISColoring      ocoloring;
2614 
2615     /* set coloring for diagonal portion */
2616     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2617 
2618     /* set coloring for off-diagonal portion */
2619     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2620     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2621     for (i=0; i<a->B->n; i++) {
2622       colors[i] = allcolors[a->garray[i]];
2623     }
2624     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2625     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2626     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2627     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2628   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2629     ISColoringValue *colors;
2630     PetscInt             *larray;
2631     ISColoring      ocoloring;
2632 
2633     /* set coloring for diagonal portion */
2634     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2635     for (i=0; i<a->A->n; i++) {
2636       larray[i] = i + a->cstart;
2637     }
2638     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2639     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2640     for (i=0; i<a->A->n; i++) {
2641       colors[i] = coloring->colors[larray[i]];
2642     }
2643     ierr = PetscFree(larray);CHKERRQ(ierr);
2644     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2645     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2646     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2647 
2648     /* set coloring for off-diagonal portion */
2649     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2650     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2651     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2652     for (i=0; i<a->B->n; i++) {
2653       colors[i] = coloring->colors[larray[i]];
2654     }
2655     ierr = PetscFree(larray);CHKERRQ(ierr);
2656     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2657     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2658     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2659   } else {
2660     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2661   }
2662 
2663   PetscFunctionReturn(0);
2664 }
2665 
2666 #if defined(PETSC_HAVE_ADIC)
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2669 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2670 {
2671   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2672   PetscErrorCode ierr;
2673 
2674   PetscFunctionBegin;
2675   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2676   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2677   PetscFunctionReturn(0);
2678 }
2679 #endif
2680 
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2683 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2684 {
2685   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2686   PetscErrorCode ierr;
2687 
2688   PetscFunctionBegin;
2689   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2690   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 #undef __FUNCT__
2695 #define __FUNCT__ "MatMerge"
2696 /*@C
2697       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2698                  matrices from each processor
2699 
2700     Collective on MPI_Comm
2701 
2702    Input Parameters:
2703 +    comm - the communicators the parallel matrix will live on
2704 .    inmat - the input sequential matrices
2705 .    n - number of local columns (or PETSC_DECIDE)
2706 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2707 
2708    Output Parameter:
2709 .    outmat - the parallel matrix generated
2710 
2711     Level: advanced
2712 
2713    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2714 
2715 @*/
2716 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2717 {
2718   PetscErrorCode ierr;
2719   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2720   PetscInt       *indx;
2721   PetscScalar    *values;
2722   PetscMap       columnmap,rowmap;
2723 
2724   PetscFunctionBegin;
2725     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2726   /*
2727   PetscMPIInt       rank;
2728   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2729   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2730   */
2731   if (scall == MAT_INITIAL_MATRIX){
2732     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2733     if (n == PETSC_DECIDE){
2734       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2735       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2736       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2737       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2738       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2739     }
2740 
2741     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2742     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2743     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2744     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2745     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2746 
2747     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2748     for (i=0;i<m;i++) {
2749       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2750       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2751       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2752     }
2753     /* This routine will ONLY return MPIAIJ type matrix */
2754     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2755     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2756     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2757     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2758 
2759   } else if (scall == MAT_REUSE_MATRIX){
2760     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2761   } else {
2762     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2763   }
2764 
2765   for (i=0;i<m;i++) {
2766     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2767     I    = i + rstart;
2768     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2769     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2770   }
2771   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2772   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2773   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2774 
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatFileSplit"
2780 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2781 {
2782   PetscErrorCode    ierr;
2783   PetscMPIInt       rank;
2784   PetscInt          m,N,i,rstart,nnz;
2785   size_t            len;
2786   const PetscInt    *indx;
2787   PetscViewer       out;
2788   char              *name;
2789   Mat               B;
2790   const PetscScalar *values;
2791 
2792   PetscFunctionBegin;
2793   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2794   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2795   /* Should this be the type of the diagonal block of A? */
2796   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2797   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2798   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2799   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2800   for (i=0;i<m;i++) {
2801     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2802     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2803     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2804   }
2805   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2806   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2807 
2808   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2809   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2810   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2811   sprintf(name,"%s.%d",outfile,rank);
2812   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2813   ierr = PetscFree(name);
2814   ierr = MatView(B,out);CHKERRQ(ierr);
2815   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2816   ierr = MatDestroy(B);CHKERRQ(ierr);
2817   PetscFunctionReturn(0);
2818 }
2819 
2820 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2821 #undef __FUNCT__
2822 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2823 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2824 {
2825   PetscErrorCode       ierr;
2826   Mat_Merge_SeqsToMPI  *merge;
2827   PetscObjectContainer container;
2828 
2829   PetscFunctionBegin;
2830   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2831   if (container) {
2832     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2833     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2834     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2835     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2836     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2837     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2838     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2839     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2840     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2841     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2842     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2843     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2844 
2845     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2846     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2847   }
2848   ierr = PetscFree(merge);CHKERRQ(ierr);
2849 
2850   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 #include "src/mat/utils/freespace.h"
2855 #include "petscbt.h"
2856 #undef __FUNCT__
2857 #define __FUNCT__ "MatMerge_SeqsToMPI"
2858 /*@C
2859       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2860                  matrices from each processor
2861 
2862     Collective on MPI_Comm
2863 
2864    Input Parameters:
2865 +    comm - the communicators the parallel matrix will live on
2866 .    seqmat - the input sequential matrices
2867 .    m - number of local rows (or PETSC_DECIDE)
2868 .    n - number of local columns (or PETSC_DECIDE)
2869 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2870 
2871    Output Parameter:
2872 .    mpimat - the parallel matrix generated
2873 
2874     Level: advanced
2875 
2876    Notes:
2877      The dimensions of the sequential matrix in each processor MUST be the same.
2878      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2879      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2880 @*/
2881 static PetscEvent logkey_seqstompinum = 0;
2882 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2883 {
2884   PetscErrorCode       ierr;
2885   MPI_Comm             comm=mpimat->comm;
2886   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2887   PetscMPIInt          size,rank,taga,*len_s;
2888   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2889   PetscInt             proc,m;
2890   PetscInt             **buf_ri,**buf_rj;
2891   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2892   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2893   MPI_Request          *s_waits,*r_waits;
2894   MPI_Status           *status;
2895   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2896   Mat_Merge_SeqsToMPI  *merge;
2897   PetscObjectContainer container;
2898 
2899   PetscFunctionBegin;
2900   if (!logkey_seqstompinum) {
2901     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2902   }
2903   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2904 
2905   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2906   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2907 
2908   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2909   if (container) {
2910     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2911   }
2912   bi     = merge->bi;
2913   bj     = merge->bj;
2914   buf_ri = merge->buf_ri;
2915   buf_rj = merge->buf_rj;
2916 
2917   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2918   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2919   len_s  = merge->len_s;
2920 
2921   /* send and recv matrix values */
2922   /*-----------------------------*/
2923   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2924   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2925 
2926   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2927   for (proc=0,k=0; proc<size; proc++){
2928     if (!len_s[proc]) continue;
2929     i = owners[proc];
2930     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2931     k++;
2932   }
2933 
2934   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2935   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2936   ierr = PetscFree(status);CHKERRQ(ierr);
2937 
2938   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2939   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2940 
2941   /* insert mat values of mpimat */
2942   /*----------------------------*/
2943   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2944   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2945   nextrow = buf_ri_k + merge->nrecv;
2946   nextai  = nextrow + merge->nrecv;
2947 
2948   for (k=0; k<merge->nrecv; k++){
2949     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2950     nrows = *(buf_ri_k[k]);
2951     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2952     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2953   }
2954 
2955   /* set values of ba */
2956   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2957   for (i=0; i<m; i++) {
2958     arow = owners[rank] + i;
2959     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2960     bnzi = bi[i+1] - bi[i];
2961     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2962 
2963     /* add local non-zero vals of this proc's seqmat into ba */
2964     anzi = ai[arow+1] - ai[arow];
2965     aj   = a->j + ai[arow];
2966     aa   = a->a + ai[arow];
2967     nextaj = 0;
2968     for (j=0; nextaj<anzi; j++){
2969       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2970         ba_i[j] += aa[nextaj++];
2971       }
2972     }
2973 
2974     /* add received vals into ba */
2975     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2976       /* i-th row */
2977       if (i == *nextrow[k]) {
2978         anzi = *(nextai[k]+1) - *nextai[k];
2979         aj   = buf_rj[k] + *(nextai[k]);
2980         aa   = abuf_r[k] + *(nextai[k]);
2981         nextaj = 0;
2982         for (j=0; nextaj<anzi; j++){
2983           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2984             ba_i[j] += aa[nextaj++];
2985           }
2986         }
2987         nextrow[k]++; nextai[k]++;
2988       }
2989     }
2990     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2991   }
2992   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2993   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2994 
2995   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
2996   ierr = PetscFree(ba_i);CHKERRQ(ierr);
2997   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
2998   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2999   PetscFunctionReturn(0);
3000 }
3001 static PetscEvent logkey_seqstompisym = 0;
3002 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3003 {
3004   PetscErrorCode       ierr;
3005   Mat                  B_mpi;
3006   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3007   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3008   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3009   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3010   PetscInt             len,proc,*dnz,*onz;
3011   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3012   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3013   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3014   MPI_Status           *status;
3015   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3016   PetscBT              lnkbt;
3017   Mat_Merge_SeqsToMPI  *merge;
3018   PetscObjectContainer container;
3019 
3020   PetscFunctionBegin;
3021   if (!logkey_seqstompisym) {
3022     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3023   }
3024   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3025 
3026   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3027   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3028 
3029   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3030   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3031 
3032   /* determine row ownership */
3033   /*---------------------------------------------------------*/
3034   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3035   if (m == PETSC_DECIDE) {
3036     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3037   } else {
3038     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3039   }
3040   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3041   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3042   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3043 
3044   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3045   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3046 
3047   /* determine the number of messages to send, their lengths */
3048   /*---------------------------------------------------------*/
3049   len_s  = merge->len_s;
3050 
3051   len = 0;  /* length of buf_si[] */
3052   merge->nsend = 0;
3053   for (proc=0; proc<size; proc++){
3054     len_si[proc] = 0;
3055     if (proc == rank){
3056       len_s[proc] = 0;
3057     } else {
3058       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3059       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3060     }
3061     if (len_s[proc]) {
3062       merge->nsend++;
3063       nrows = 0;
3064       for (i=owners[proc]; i<owners[proc+1]; i++){
3065         if (ai[i+1] > ai[i]) nrows++;
3066       }
3067       len_si[proc] = 2*(nrows+1);
3068       len += len_si[proc];
3069     }
3070   }
3071 
3072   /* determine the number and length of messages to receive for ij-structure */
3073   /*-------------------------------------------------------------------------*/
3074   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3075   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3076 
3077   /* post the Irecv of j-structure */
3078   /*-------------------------------*/
3079   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3080   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3081 
3082   /* post the Isend of j-structure */
3083   /*--------------------------------*/
3084   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3085   sj_waits = si_waits + merge->nsend;
3086 
3087   for (proc=0, k=0; proc<size; proc++){
3088     if (!len_s[proc]) continue;
3089     i = owners[proc];
3090     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3091     k++;
3092   }
3093 
3094   /* receives and sends of j-structure are complete */
3095   /*------------------------------------------------*/
3096   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3097   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3098 
3099   /* send and recv i-structure */
3100   /*---------------------------*/
3101   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3102   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3103 
3104   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3105   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3106   for (proc=0,k=0; proc<size; proc++){
3107     if (!len_s[proc]) continue;
3108     /* form outgoing message for i-structure:
3109          buf_si[0]:                 nrows to be sent
3110                [1:nrows]:           row index (global)
3111                [nrows+1:2*nrows+1]: i-structure index
3112     */
3113     /*-------------------------------------------*/
3114     nrows = len_si[proc]/2 - 1;
3115     buf_si_i    = buf_si + nrows+1;
3116     buf_si[0]   = nrows;
3117     buf_si_i[0] = 0;
3118     nrows = 0;
3119     for (i=owners[proc]; i<owners[proc+1]; i++){
3120       anzi = ai[i+1] - ai[i];
3121       if (anzi) {
3122         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3123         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3124         nrows++;
3125       }
3126     }
3127     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3128     k++;
3129     buf_si += len_si[proc];
3130   }
3131 
3132   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3133   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3134 
3135   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3136   for (i=0; i<merge->nrecv; i++){
3137     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3138   }
3139 
3140   ierr = PetscFree(len_si);CHKERRQ(ierr);
3141   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3142   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3143   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3144   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3145   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3146   ierr = PetscFree(status);CHKERRQ(ierr);
3147 
3148   /* compute a local seq matrix in each processor */
3149   /*----------------------------------------------*/
3150   /* allocate bi array and free space for accumulating nonzero column info */
3151   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3152   bi[0] = 0;
3153 
3154   /* create and initialize a linked list */
3155   nlnk = N+1;
3156   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3157 
3158   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3159   len = 0;
3160   len  = ai[owners[rank+1]] - ai[owners[rank]];
3161   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3162   current_space = free_space;
3163 
3164   /* determine symbolic info for each local row */
3165   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3166   nextrow = buf_ri_k + merge->nrecv;
3167   nextai  = nextrow + merge->nrecv;
3168   for (k=0; k<merge->nrecv; k++){
3169     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3170     nrows = *buf_ri_k[k];
3171     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3172     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3173   }
3174 
3175   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3176   len = 0;
3177   for (i=0;i<m;i++) {
3178     bnzi   = 0;
3179     /* add local non-zero cols of this proc's seqmat into lnk */
3180     arow   = owners[rank] + i;
3181     anzi   = ai[arow+1] - ai[arow];
3182     aj     = a->j + ai[arow];
3183     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3184     bnzi += nlnk;
3185     /* add received col data into lnk */
3186     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3187       if (i == *nextrow[k]) { /* i-th row */
3188         anzi = *(nextai[k]+1) - *nextai[k];
3189         aj   = buf_rj[k] + *nextai[k];
3190         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3191         bnzi += nlnk;
3192         nextrow[k]++; nextai[k]++;
3193       }
3194     }
3195     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3196 
3197     /* if free space is not available, make more free space */
3198     if (current_space->local_remaining<bnzi) {
3199       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3200       nspacedouble++;
3201     }
3202     /* copy data into free space, then initialize lnk */
3203     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3204     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3205 
3206     current_space->array           += bnzi;
3207     current_space->local_used      += bnzi;
3208     current_space->local_remaining -= bnzi;
3209 
3210     bi[i+1] = bi[i] + bnzi;
3211   }
3212 
3213   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3214 
3215   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3216   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3217   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3218 
3219   /* create symbolic parallel matrix B_mpi */
3220   /*---------------------------------------*/
3221   if (n==PETSC_DECIDE) {
3222     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3223   } else {
3224     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3225   }
3226   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3227   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3228   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3229 
3230   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3231   B_mpi->assembled     = PETSC_FALSE;
3232   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3233   merge->bi            = bi;
3234   merge->bj            = bj;
3235   merge->buf_ri        = buf_ri;
3236   merge->buf_rj        = buf_rj;
3237   merge->coi           = PETSC_NULL;
3238   merge->coj           = PETSC_NULL;
3239   merge->owners_co     = PETSC_NULL;
3240 
3241   /* attach the supporting struct to B_mpi for reuse */
3242   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3243   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3244   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3245   *mpimat = B_mpi;
3246   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3247   PetscFunctionReturn(0);
3248 }
3249 
3250 static PetscEvent logkey_seqstompi = 0;
3251 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3252 {
3253   PetscErrorCode   ierr;
3254 
3255   PetscFunctionBegin;
3256   if (!logkey_seqstompi) {
3257     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3258   }
3259   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3260   if (scall == MAT_INITIAL_MATRIX){
3261     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3262   }
3263   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3264   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267 static PetscEvent logkey_getlocalmat = 0;
3268 #undef __FUNCT__
3269 #define __FUNCT__ "MatGetLocalMat"
3270 /*@C
3271      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3272 
3273     Not Collective
3274 
3275    Input Parameters:
3276 +    A - the matrix
3277 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3278 
3279    Output Parameter:
3280 .    A_loc - the local sequential matrix generated
3281 
3282     Level: developer
3283 
3284 @*/
3285 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3286 {
3287   PetscErrorCode  ierr;
3288   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3289   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3290   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3291   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3292   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3293   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3294 
3295   PetscFunctionBegin;
3296   if (!logkey_getlocalmat) {
3297     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3298   }
3299   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3300   if (scall == MAT_INITIAL_MATRIX){
3301     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3302     ci[0] = 0;
3303     for (i=0; i<am; i++){
3304       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3305     }
3306     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3307     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3308     k = 0;
3309     for (i=0; i<am; i++) {
3310       ncols_o = bi[i+1] - bi[i];
3311       ncols_d = ai[i+1] - ai[i];
3312       /* off-diagonal portion of A */
3313       for (jo=0; jo<ncols_o; jo++) {
3314         col = cmap[*bj];
3315         if (col >= cstart) break;
3316         cj[k]   = col; bj++;
3317         ca[k++] = *ba++;
3318       }
3319       /* diagonal portion of A */
3320       for (j=0; j<ncols_d; j++) {
3321         cj[k]   = cstart + *aj++;
3322         ca[k++] = *aa++;
3323       }
3324       /* off-diagonal portion of A */
3325       for (j=jo; j<ncols_o; j++) {
3326         cj[k]   = cmap[*bj++];
3327         ca[k++] = *ba++;
3328       }
3329     }
3330     /* put together the new matrix */
3331     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3332     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3333     /* Since these are PETSc arrays, change flags to free them as necessary. */
3334     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3335     mat->freedata = PETSC_TRUE;
3336     mat->nonew    = 0;
3337   } else if (scall == MAT_REUSE_MATRIX){
3338     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3339     ci = mat->i; cj = mat->j; ca = mat->a;
3340     for (i=0; i<am; i++) {
3341       /* off-diagonal portion of A */
3342       ncols_o = bi[i+1] - bi[i];
3343       for (jo=0; jo<ncols_o; jo++) {
3344         col = cmap[*bj];
3345         if (col >= cstart) break;
3346         *ca++ = *ba++; bj++;
3347       }
3348       /* diagonal portion of A */
3349       ncols_d = ai[i+1] - ai[i];
3350       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3351       /* off-diagonal portion of A */
3352       for (j=jo; j<ncols_o; j++) {
3353         *ca++ = *ba++; bj++;
3354       }
3355     }
3356   } else {
3357     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3358   }
3359 
3360   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3361   PetscFunctionReturn(0);
3362 }
3363 
3364 static PetscEvent logkey_getlocalmatcondensed = 0;
3365 #undef __FUNCT__
3366 #define __FUNCT__ "MatGetLocalMatCondensed"
3367 /*@C
3368      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3369 
3370     Not Collective
3371 
3372    Input Parameters:
3373 +    A - the matrix
3374 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3375 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3376 
3377    Output Parameter:
3378 .    A_loc - the local sequential matrix generated
3379 
3380     Level: developer
3381 
3382 @*/
3383 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3384 {
3385   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3386   PetscErrorCode    ierr;
3387   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3388   IS                isrowa,iscola;
3389   Mat               *aloc;
3390 
3391   PetscFunctionBegin;
3392   if (!logkey_getlocalmatcondensed) {
3393     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3394   }
3395   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3396   if (!row){
3397     start = a->rstart; end = a->rend;
3398     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3399   } else {
3400     isrowa = *row;
3401   }
3402   if (!col){
3403     start = a->cstart;
3404     cmap  = a->garray;
3405     nzA   = a->A->n;
3406     nzB   = a->B->n;
3407     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3408     ncols = 0;
3409     for (i=0; i<nzB; i++) {
3410       if (cmap[i] < start) idx[ncols++] = cmap[i];
3411       else break;
3412     }
3413     imark = i;
3414     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3415     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3416     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3417     ierr = PetscFree(idx);CHKERRQ(ierr);
3418   } else {
3419     iscola = *col;
3420   }
3421   if (scall != MAT_INITIAL_MATRIX){
3422     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3423     aloc[0] = *A_loc;
3424   }
3425   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3426   *A_loc = aloc[0];
3427   ierr = PetscFree(aloc);CHKERRQ(ierr);
3428   if (!row){
3429     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3430   }
3431   if (!col){
3432     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3433   }
3434   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3435   PetscFunctionReturn(0);
3436 }
3437 
3438 static PetscEvent logkey_GetBrowsOfAcols = 0;
3439 #undef __FUNCT__
3440 #define __FUNCT__ "MatGetBrowsOfAcols"
3441 /*@C
3442     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3443 
3444     Collective on Mat
3445 
3446    Input Parameters:
3447 +    A,B - the matrices in mpiaij format
3448 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3449 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3450 
3451    Output Parameter:
3452 +    rowb, colb - index sets of rows and columns of B to extract
3453 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3454 -    B_seq - the sequential matrix generated
3455 
3456     Level: developer
3457 
3458 @*/
3459 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3460 {
3461   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3462   PetscErrorCode    ierr;
3463   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3464   IS                isrowb,iscolb;
3465   Mat               *bseq;
3466 
3467   PetscFunctionBegin;
3468   if (a->cstart != b->rstart || a->cend != b->rend){
3469     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3470   }
3471   if (!logkey_GetBrowsOfAcols) {
3472     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3473   }
3474   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3475 
3476   if (scall == MAT_INITIAL_MATRIX){
3477     start = a->cstart;
3478     cmap  = a->garray;
3479     nzA   = a->A->n;
3480     nzB   = a->B->n;
3481     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3482     ncols = 0;
3483     for (i=0; i<nzB; i++) {  /* row < local row index */
3484       if (cmap[i] < start) idx[ncols++] = cmap[i];
3485       else break;
3486     }
3487     imark = i;
3488     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3489     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3490     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3491     ierr = PetscFree(idx);CHKERRQ(ierr);
3492     *brstart = imark;
3493     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3494   } else {
3495     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3496     isrowb = *rowb; iscolb = *colb;
3497     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3498     bseq[0] = *B_seq;
3499   }
3500   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3501   *B_seq = bseq[0];
3502   ierr = PetscFree(bseq);CHKERRQ(ierr);
3503   if (!rowb){
3504     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3505   } else {
3506     *rowb = isrowb;
3507   }
3508   if (!colb){
3509     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3510   } else {
3511     *colb = iscolb;
3512   }
3513   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3514   PetscFunctionReturn(0);
3515 }
3516 
3517 static PetscEvent logkey_GetBrowsOfAocols = 0;
3518 #undef __FUNCT__
3519 #define __FUNCT__ "MatGetBrowsOfAoCols"
3520 /*@C
3521     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3522     of the OFF-DIAGONAL portion of local A
3523 
3524     Collective on Mat
3525 
3526    Input Parameters:
3527 +    A,B - the matrices in mpiaij format
3528 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3529 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3530 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3531 
3532    Output Parameter:
3533 +    B_oth - the sequential matrix generated
3534 
3535     Level: developer
3536 
3537 @*/
3538 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3539 {
3540   VecScatter_MPI_General *gen_to,*gen_from;
3541   PetscErrorCode         ierr;
3542   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3543   Mat_SeqAIJ             *b_oth;
3544   VecScatter             ctx=a->Mvctx;
3545   MPI_Comm               comm=ctx->comm;
3546   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3547   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3548   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3549   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3550   MPI_Request            *rwaits,*swaits;
3551   MPI_Status             *sstatus,rstatus;
3552   PetscInt               *cols;
3553   PetscScalar            *vals;
3554   PetscMPIInt            j;
3555 
3556   PetscFunctionBegin;
3557   if (a->cstart != b->rstart || a->cend != b->rend){
3558     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3559   }
3560   if (!logkey_GetBrowsOfAocols) {
3561     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3562   }
3563   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3564   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3565 
3566   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3567   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3568   rvalues  = gen_from->values; /* holds the length of sending row */
3569   svalues  = gen_to->values;   /* holds the length of receiving row */
3570   nrecvs   = gen_from->n;
3571   nsends   = gen_to->n;
3572   rwaits   = gen_from->requests;
3573   swaits   = gen_to->requests;
3574   srow     = gen_to->indices;   /* local row index to be sent */
3575   rstarts  = gen_from->starts;
3576   sstarts  = gen_to->starts;
3577   rprocs   = gen_from->procs;
3578   sprocs   = gen_to->procs;
3579   sstatus  = gen_to->sstatus;
3580 
3581   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3582   if (scall == MAT_INITIAL_MATRIX){
3583     /* i-array */
3584     /*---------*/
3585     /*  post receives */
3586     for (i=0; i<nrecvs; i++){
3587       rowlen = (PetscInt*)rvalues + rstarts[i];
3588       nrows = rstarts[i+1]-rstarts[i];
3589       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3590     }
3591 
3592     /* pack the outgoing message */
3593     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3594     rstartsj = sstartsj + nsends +1;
3595     sstartsj[0] = 0;  rstartsj[0] = 0;
3596     len = 0; /* total length of j or a array to be sent */
3597     k = 0;
3598     for (i=0; i<nsends; i++){
3599       rowlen = (PetscInt*)svalues + sstarts[i];
3600       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3601       for (j=0; j<nrows; j++) {
3602         row = srow[k] + b->rowners[rank]; /* global row idx */
3603         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3604         len += rowlen[j];
3605         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3606         k++;
3607       }
3608       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3609        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3610     }
3611     /* recvs and sends of i-array are completed */
3612     i = nrecvs;
3613     while (i--) {
3614       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3615     }
3616     if (nsends) {
3617       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3618     }
3619     /* allocate buffers for sending j and a arrays */
3620     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3621     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3622 
3623     /* create i-array of B_oth */
3624     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3625     b_othi[0] = 0;
3626     len = 0; /* total length of j or a array to be received */
3627     k = 0;
3628     for (i=0; i<nrecvs; i++){
3629       rowlen = (PetscInt*)rvalues + rstarts[i];
3630       nrows = rstarts[i+1]-rstarts[i];
3631       for (j=0; j<nrows; j++) {
3632         b_othi[k+1] = b_othi[k] + rowlen[j];
3633         len += rowlen[j]; k++;
3634       }
3635       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3636     }
3637 
3638     /* allocate space for j and a arrrays of B_oth */
3639     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3640     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3641 
3642     /* j-array */
3643     /*---------*/
3644     /*  post receives of j-array */
3645     for (i=0; i<nrecvs; i++){
3646       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3647       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3648     }
3649     k = 0;
3650     for (i=0; i<nsends; i++){
3651       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3652       bufJ = bufj+sstartsj[i];
3653       for (j=0; j<nrows; j++) {
3654         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3655         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3656         for (l=0; l<ncols; l++){
3657           *bufJ++ = cols[l];
3658         }
3659         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3660       }
3661       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3662     }
3663 
3664     /* recvs and sends of j-array are completed */
3665     i = nrecvs;
3666     while (i--) {
3667       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3668     }
3669     if (nsends) {
3670       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3671     }
3672   } else if (scall == MAT_REUSE_MATRIX){
3673     sstartsj = *startsj;
3674     rstartsj = sstartsj + nsends +1;
3675     bufa     = *bufa_ptr;
3676     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3677     b_otha   = b_oth->a;
3678   } else {
3679     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3680   }
3681 
3682   /* a-array */
3683   /*---------*/
3684   /*  post receives of a-array */
3685   for (i=0; i<nrecvs; i++){
3686     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3687     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3688   }
3689   k = 0;
3690   for (i=0; i<nsends; i++){
3691     nrows = sstarts[i+1]-sstarts[i];
3692     bufA = bufa+sstartsj[i];
3693     for (j=0; j<nrows; j++) {
3694       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3695       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3696       for (l=0; l<ncols; l++){
3697         *bufA++ = vals[l];
3698       }
3699       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3700 
3701     }
3702     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3703   }
3704   /* recvs and sends of a-array are completed */
3705   i = nrecvs;
3706   while (i--) {
3707     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3708   }
3709    if (nsends) {
3710     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3711   }
3712 
3713   if (scall == MAT_INITIAL_MATRIX){
3714     /* put together the new matrix */
3715     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3716 
3717     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3718     /* Since these are PETSc arrays, change flags to free them as necessary. */
3719     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3720     b_oth->freedata = PETSC_TRUE;
3721     b_oth->nonew    = 0;
3722 
3723     ierr = PetscFree(bufj);CHKERRQ(ierr);
3724     if (!startsj || !bufa_ptr){
3725       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3726       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3727     } else {
3728       *startsj  = sstartsj;
3729       *bufa_ptr = bufa;
3730     }
3731   }
3732   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3733 
3734   PetscFunctionReturn(0);
3735 }
3736 
3737 /*MC
3738    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3739 
3740    Options Database Keys:
3741 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3742 
3743   Level: beginner
3744 
3745 .seealso: MatCreateMPIAIJ
3746 M*/
3747 
3748 EXTERN_C_BEGIN
3749 #undef __FUNCT__
3750 #define __FUNCT__ "MatCreate_MPIAIJ"
3751 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3752 {
3753   Mat_MPIAIJ     *b;
3754   PetscErrorCode ierr;
3755   PetscInt       i;
3756   PetscMPIInt    size;
3757 
3758   PetscFunctionBegin;
3759   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3760 
3761   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3762   B->data         = (void*)b;
3763   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3764   B->factor       = 0;
3765   B->bs           = 1;
3766   B->assembled    = PETSC_FALSE;
3767   B->mapping      = 0;
3768 
3769   B->insertmode      = NOT_SET_VALUES;
3770   b->size            = size;
3771   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3772 
3773   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3774   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3775 
3776   /* the information in the maps duplicates the information computed below, eventually
3777      we should remove the duplicate information that is not contained in the maps */
3778   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3779   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3780 
3781   /* build local table of row and column ownerships */
3782   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3783   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3784   b->cowners = b->rowners + b->size + 2;
3785   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3786   b->rowners[0] = 0;
3787   for (i=2; i<=b->size; i++) {
3788     b->rowners[i] += b->rowners[i-1];
3789   }
3790   b->rstart = b->rowners[b->rank];
3791   b->rend   = b->rowners[b->rank+1];
3792   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3793   b->cowners[0] = 0;
3794   for (i=2; i<=b->size; i++) {
3795     b->cowners[i] += b->cowners[i-1];
3796   }
3797   b->cstart = b->cowners[b->rank];
3798   b->cend   = b->cowners[b->rank+1];
3799 
3800   /* build cache for off array entries formed */
3801   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3802   b->donotstash  = PETSC_FALSE;
3803   b->colmap      = 0;
3804   b->garray      = 0;
3805   b->roworiented = PETSC_TRUE;
3806 
3807   /* stuff used for matrix vector multiply */
3808   b->lvec      = PETSC_NULL;
3809   b->Mvctx     = PETSC_NULL;
3810 
3811   /* stuff for MatGetRow() */
3812   b->rowindices   = 0;
3813   b->rowvalues    = 0;
3814   b->getrowactive = PETSC_FALSE;
3815 
3816   /* Explicitly create 2 MATSEQAIJ matrices. */
3817   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3818   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3819   PetscLogObjectParent(B,b->A);
3820   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3821   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3822   PetscLogObjectParent(B,b->B);
3823 
3824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3825                                      "MatStoreValues_MPIAIJ",
3826                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3828                                      "MatRetrieveValues_MPIAIJ",
3829                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3830   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3831 				     "MatGetDiagonalBlock_MPIAIJ",
3832                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3833   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3834 				     "MatIsTranspose_MPIAIJ",
3835 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3836   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3837 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3838 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3839   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3840 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3841 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3842   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3843 				     "MatDiagonalScaleLocal_MPIAIJ",
3844 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3845   PetscFunctionReturn(0);
3846 }
3847 EXTERN_C_END
3848