xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d44834fb5aa6ab55eaa2e18f09dfd00e40aa4639)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (PetscInt*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (PetscInt*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar    value;
190   PetscErrorCode ierr;
191   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
192   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth     roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat            A = aij->A;
197   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
198   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar    *aa = a->a;
200   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat            B = aij->B;
202   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
203   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar    *ba = b->a;
205 
206   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   PetscInt       nonew = a->nonew,shift=0;
208   PetscScalar    *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
278   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   PetscInt       nstash,reallocs;
322   InsertMode     addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   PetscMPIInt    n;
351   PetscInt       i,j,rstart,ncols,flg;
352   PetscInt       *row,*col,other_disassembled;
353   PetscScalar    *val;
354   InsertMode     addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for (i=0; i<n;) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388       /* reaccess the b because aij->B was changed */
389       b    = (Mat_SeqAIJ *)aij->B->data;
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403 
404   /* used by MatAXPY() */
405   a->xtoy = 0; b->xtoy = 0;
406   a->XtoY = 0; b->XtoY = 0;
407 
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
413 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
414 {
415   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
416   PetscErrorCode ierr;
417 
418   PetscFunctionBegin;
419   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
420   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatZeroRows_MPIAIJ"
426 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
427 {
428   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
429   PetscErrorCode ierr;
430   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
431   PetscInt       i,N,*rows,*owners = l->rowners;
432   PetscInt       *nprocs,j,idx,nsends,row;
433   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
434   PetscInt       *rvalues,count,base,slen,*source;
435   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
436   MPI_Comm       comm = A->comm;
437   MPI_Request    *send_waits,*recv_waits;
438   MPI_Status     recv_status,*send_status;
439   IS             istmp;
440   PetscTruth     found;
441 
442   PetscFunctionBegin;
443   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
444   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
445 
446   /*  first count number of contributors to each processor */
447   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
448   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
450   for (i=0; i<N; i++) {
451     idx = rows[i];
452     found = PETSC_FALSE;
453     for (j=0; j<size; j++) {
454       if (idx >= owners[j] && idx < owners[j+1]) {
455         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
456       }
457     }
458     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
459   }
460   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
461 
462   /* inform other processors of number of messages and max length*/
463   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
464 
465   /* post receives:   */
466   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
467   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
468   for (i=0; i<nrecvs; i++) {
469     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
470   }
471 
472   /* do sends:
473       1) starts[i] gives the starting index in svalues for stuff going to
474          the ith processor
475   */
476   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
477   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
478   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
479   starts[0] = 0;
480   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
481   for (i=0; i<N; i++) {
482     svalues[starts[owner[i]]++] = rows[i];
483   }
484   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
485 
486   starts[0] = 0;
487   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
488   count = 0;
489   for (i=0; i<size; i++) {
490     if (nprocs[2*i+1]) {
491       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
492     }
493   }
494   ierr = PetscFree(starts);CHKERRQ(ierr);
495 
496   base = owners[rank];
497 
498   /*  wait on receives */
499   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
500   source = lens + nrecvs;
501   count  = nrecvs; slen = 0;
502   while (count) {
503     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
504     /* unpack receives into our local space */
505     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
506     source[imdex]  = recv_status.MPI_SOURCE;
507     lens[imdex]    = n;
508     slen          += n;
509     count--;
510   }
511   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
512 
513   /* move the data into the send scatter */
514   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
515   count = 0;
516   for (i=0; i<nrecvs; i++) {
517     values = rvalues + i*nmax;
518     for (j=0; j<lens[i]; j++) {
519       lrows[count++] = values[j] - base;
520     }
521   }
522   ierr = PetscFree(rvalues);CHKERRQ(ierr);
523   ierr = PetscFree(lens);CHKERRQ(ierr);
524   ierr = PetscFree(owner);CHKERRQ(ierr);
525   ierr = PetscFree(nprocs);CHKERRQ(ierr);
526 
527   /* actually zap the local rows */
528   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
529   PetscLogObjectParent(A,istmp);
530 
531   /*
532         Zero the required rows. If the "diagonal block" of the matrix
533      is square and the user wishes to set the diagonal we use seperate
534      code so that MatSetValues() is not called for each diagonal allocating
535      new memory, thus calling lots of mallocs and slowing things down.
536 
537        Contributed by: Mathew Knepley
538   */
539   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
540   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
541   if (diag && (l->A->M == l->A->N)) {
542     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
543   } else if (diag) {
544     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
545     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
546       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
547 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
548     }
549     for (i = 0; i < slen; i++) {
550       row  = lrows[i] + rstart;
551       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
552     }
553     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555   } else {
556     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
557   }
558   ierr = ISDestroy(istmp);CHKERRQ(ierr);
559   ierr = PetscFree(lrows);CHKERRQ(ierr);
560 
561   /* wait on sends */
562   if (nsends) {
563     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
564     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
565     ierr = PetscFree(send_status);CHKERRQ(ierr);
566   }
567   ierr = PetscFree(send_waits);CHKERRQ(ierr);
568   ierr = PetscFree(svalues);CHKERRQ(ierr);
569 
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatMult_MPIAIJ"
575 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
576 {
577   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
578   PetscErrorCode ierr;
579   PetscInt       nt;
580 
581   PetscFunctionBegin;
582   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
583   if (nt != A->n) {
584     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
585   }
586   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
588   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
589   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
590   PetscFunctionReturn(0);
591 }
592 
593 #undef __FUNCT__
594 #define __FUNCT__ "MatMultAdd_MPIAIJ"
595 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
596 {
597   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
598   PetscErrorCode ierr;
599 
600   PetscFunctionBegin;
601   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
603   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
610 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
611 {
612   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
613   PetscErrorCode ierr;
614 
615   PetscFunctionBegin;
616   /* do nondiagonal part */
617   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
618   /* send it on its way */
619   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
620   /* do local part */
621   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
622   /* receive remote parts: note this assumes the values are not actually */
623   /* inserted in yy until the next line, which is true for my implementation*/
624   /* but is not perhaps always true. */
625   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
626   PetscFunctionReturn(0);
627 }
628 
629 EXTERN_C_BEGIN
630 #undef __FUNCT__
631 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
632 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
633 {
634   MPI_Comm       comm;
635   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
636   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
637   IS             Me,Notme;
638   PetscErrorCode ierr;
639   PetscInt       M,N,first,last,*notme,i;
640   PetscMPIInt    size;
641 
642   PetscFunctionBegin;
643 
644   /* Easy test: symmetric diagonal block */
645   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
646   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
647   if (!*f) PetscFunctionReturn(0);
648   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
649   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
650   if (size == 1) PetscFunctionReturn(0);
651 
652   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
653   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
654   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
655   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
656   for (i=0; i<first; i++) notme[i] = i;
657   for (i=last; i<M; i++) notme[i-last+first] = i;
658   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
659   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
660   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
661   Aoff = Aoffs[0];
662   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
663   Boff = Boffs[0];
664   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
665   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
666   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
667   ierr = ISDestroy(Me);CHKERRQ(ierr);
668   ierr = ISDestroy(Notme);CHKERRQ(ierr);
669 
670   PetscFunctionReturn(0);
671 }
672 EXTERN_C_END
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
676 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
677 {
678   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
679   PetscErrorCode ierr;
680 
681   PetscFunctionBegin;
682   /* do nondiagonal part */
683   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
684   /* send it on its way */
685   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
686   /* do local part */
687   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
688   /* receive remote parts: note this assumes the values are not actually */
689   /* inserted in yy until the next line, which is true for my implementation*/
690   /* but is not perhaps always true. */
691   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 /*
696   This only works correctly for square matrices where the subblock A->A is the
697    diagonal block
698 */
699 #undef __FUNCT__
700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
701 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
702 {
703   PetscErrorCode ierr;
704   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
705 
706   PetscFunctionBegin;
707   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
708   if (a->rstart != a->cstart || a->rend != a->cend) {
709     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
710   }
711   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatScale_MPIAIJ"
717 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721 
722   PetscFunctionBegin;
723   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
724   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatDestroy_MPIAIJ"
730 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
731 {
732   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
733   PetscErrorCode ierr;
734 
735   PetscFunctionBegin;
736 #if defined(PETSC_USE_LOG)
737   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
738 #endif
739   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
740   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
741   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
742   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
743 #if defined (PETSC_USE_CTABLE)
744   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
745 #else
746   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
747 #endif
748   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
749   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
750   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
751   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
752   ierr = PetscFree(aij);CHKERRQ(ierr);
753 
754   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
755   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
756   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
757   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
760   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatView_MPIAIJ_Binary"
766 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
767 {
768   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
769   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
770   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
771   PetscErrorCode    ierr;
772   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
773   int               fd;
774   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
775   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
776   PetscScalar       *column_values;
777 
778   PetscFunctionBegin;
779   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
780   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
781   nz   = A->nz + B->nz;
782   if (!rank) {
783     header[0] = MAT_FILE_COOKIE;
784     header[1] = mat->M;
785     header[2] = mat->N;
786     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
787     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
788     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     /* get largest number of rows any processor has */
790     rlen = mat->m;
791     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
792     for (i=1; i<size; i++) {
793       rlen = PetscMax(rlen,range[i+1] - range[i]);
794     }
795   } else {
796     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
797     rlen = mat->m;
798   }
799 
800   /* load up the local row counts */
801   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
802   for (i=0; i<mat->m; i++) {
803     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
804   }
805 
806   /* store the row lengths to the file */
807   if (!rank) {
808     MPI_Status status;
809     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
810     for (i=1; i<size; i++) {
811       rlen = range[i+1] - range[i];
812       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
813       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
814     }
815   } else {
816     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
817   }
818   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
819 
820   /* load up the local column indices */
821   nzmax = nz; /* )th processor needs space a largest processor needs */
822   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
823   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
824   cnt  = 0;
825   for (i=0; i<mat->m; i++) {
826     for (j=B->i[i]; j<B->i[i+1]; j++) {
827       if ( (col = garray[B->j[j]]) > cstart) break;
828       column_indices[cnt++] = col;
829     }
830     for (k=A->i[i]; k<A->i[i+1]; k++) {
831       column_indices[cnt++] = A->j[k] + cstart;
832     }
833     for (; j<B->i[i+1]; j++) {
834       column_indices[cnt++] = garray[B->j[j]];
835     }
836   }
837   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
838 
839   /* store the column indices to the file */
840   if (!rank) {
841     MPI_Status status;
842     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
843     for (i=1; i<size; i++) {
844       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
845       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
846       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
847       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
848     }
849   } else {
850     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
851     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
852   }
853   ierr = PetscFree(column_indices);CHKERRQ(ierr);
854 
855   /* load up the local column values */
856   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
857   cnt  = 0;
858   for (i=0; i<mat->m; i++) {
859     for (j=B->i[i]; j<B->i[i+1]; j++) {
860       if ( garray[B->j[j]] > cstart) break;
861       column_values[cnt++] = B->a[j];
862     }
863     for (k=A->i[i]; k<A->i[i+1]; k++) {
864       column_values[cnt++] = A->a[k];
865     }
866     for (; j<B->i[i+1]; j++) {
867       column_values[cnt++] = B->a[j];
868     }
869   }
870   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
871 
872   /* store the column values to the file */
873   if (!rank) {
874     MPI_Status status;
875     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
876     for (i=1; i<size; i++) {
877       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
878       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
879       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
880       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
881     }
882   } else {
883     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
884     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
885   }
886   ierr = PetscFree(column_values);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
892 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
893 {
894   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
895   PetscErrorCode    ierr;
896   PetscMPIInt       rank = aij->rank,size = aij->size;
897   PetscTruth        isdraw,iascii,flg,isbinary;
898   PetscViewer       sviewer;
899   PetscViewerFormat format;
900 
901   PetscFunctionBegin;
902   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
903   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
905   if (iascii) {
906     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
907     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
908       MatInfo info;
909       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
910       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
911       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
912       if (flg) {
913         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
914 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
915       } else {
916         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
917 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
918       }
919       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
920       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
921       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
922       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
923       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
924       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
925       PetscFunctionReturn(0);
926     } else if (format == PETSC_VIEWER_ASCII_INFO) {
927       PetscFunctionReturn(0);
928     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
929       PetscFunctionReturn(0);
930     }
931   } else if (isbinary) {
932     if (size == 1) {
933       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935     } else {
936       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
937     }
938     PetscFunctionReturn(0);
939   } else if (isdraw) {
940     PetscDraw  draw;
941     PetscTruth isnull;
942     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
943     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
944   }
945 
946   if (size == 1) {
947     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
948     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
949   } else {
950     /* assemble the entire matrix onto first processor. */
951     Mat         A;
952     Mat_SeqAIJ  *Aloc;
953     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
954     PetscScalar *a;
955 
956     if (!rank) {
957       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
958     } else {
959       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
960     }
961     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
962     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
963     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
964     PetscLogObjectParent(mat,A);
965 
966     /* copy over the A part */
967     Aloc = (Mat_SeqAIJ*)aij->A->data;
968     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
969     row = aij->rstart;
970     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
971     for (i=0; i<m; i++) {
972       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
973       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
974     }
975     aj = Aloc->j;
976     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
977 
978     /* copy over the B part */
979     Aloc = (Mat_SeqAIJ*)aij->B->data;
980     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
981     row  = aij->rstart;
982     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
983     ct   = cols;
984     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
985     for (i=0; i<m; i++) {
986       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
987       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
988     }
989     ierr = PetscFree(ct);CHKERRQ(ierr);
990     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
992     /*
993        Everyone has to call to draw the matrix since the graphics waits are
994        synchronized across all processors that share the PetscDraw object
995     */
996     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
997     if (!rank) {
998       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
999       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1000     }
1001     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1002     ierr = MatDestroy(A);CHKERRQ(ierr);
1003   }
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatView_MPIAIJ"
1009 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1010 {
1011   PetscErrorCode ierr;
1012   PetscTruth     iascii,isdraw,issocket,isbinary;
1013 
1014   PetscFunctionBegin;
1015   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1016   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1017   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1018   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1019   if (iascii || isdraw || isbinary || issocket) {
1020     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1021   } else {
1022     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatRelax_MPIAIJ"
1031 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1032 {
1033   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1034   PetscErrorCode ierr;
1035   Vec            bb1;
1036   PetscScalar    mone=-1.0;
1037 
1038   PetscFunctionBegin;
1039   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1040 
1041   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1042 
1043   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1044     if (flag & SOR_ZERO_INITIAL_GUESS) {
1045       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1046       its--;
1047     }
1048 
1049     while (its--) {
1050       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1051       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052 
1053       /* update rhs: bb1 = bb - B*x */
1054       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1055       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1056 
1057       /* local sweep */
1058       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1059       CHKERRQ(ierr);
1060     }
1061   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1062     if (flag & SOR_ZERO_INITIAL_GUESS) {
1063       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1064       its--;
1065     }
1066     while (its--) {
1067       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069 
1070       /* update rhs: bb1 = bb - B*x */
1071       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1072       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1073 
1074       /* local sweep */
1075       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1076       CHKERRQ(ierr);
1077     }
1078   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1079     if (flag & SOR_ZERO_INITIAL_GUESS) {
1080       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1081       its--;
1082     }
1083     while (its--) {
1084       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1085       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1086 
1087       /* update rhs: bb1 = bb - B*x */
1088       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1089       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1090 
1091       /* local sweep */
1092       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1093       CHKERRQ(ierr);
1094     }
1095   } else {
1096     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1097   }
1098 
1099   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1105 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1106 {
1107   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1108   Mat            A = mat->A,B = mat->B;
1109   PetscErrorCode ierr;
1110   PetscReal      isend[5],irecv[5];
1111 
1112   PetscFunctionBegin;
1113   info->block_size     = 1.0;
1114   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1115   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1116   isend[3] = info->memory;  isend[4] = info->mallocs;
1117   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1118   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1119   isend[3] += info->memory;  isend[4] += info->mallocs;
1120   if (flag == MAT_LOCAL) {
1121     info->nz_used      = isend[0];
1122     info->nz_allocated = isend[1];
1123     info->nz_unneeded  = isend[2];
1124     info->memory       = isend[3];
1125     info->mallocs      = isend[4];
1126   } else if (flag == MAT_GLOBAL_MAX) {
1127     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1128     info->nz_used      = irecv[0];
1129     info->nz_allocated = irecv[1];
1130     info->nz_unneeded  = irecv[2];
1131     info->memory       = irecv[3];
1132     info->mallocs      = irecv[4];
1133   } else if (flag == MAT_GLOBAL_SUM) {
1134     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1135     info->nz_used      = irecv[0];
1136     info->nz_allocated = irecv[1];
1137     info->nz_unneeded  = irecv[2];
1138     info->memory       = irecv[3];
1139     info->mallocs      = irecv[4];
1140   }
1141   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1142   info->fill_ratio_needed = 0;
1143   info->factor_mallocs    = 0;
1144   info->rows_global       = (double)matin->M;
1145   info->columns_global    = (double)matin->N;
1146   info->rows_local        = (double)matin->m;
1147   info->columns_local     = (double)matin->N;
1148 
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatSetOption_MPIAIJ"
1154 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1155 {
1156   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1157   PetscErrorCode ierr;
1158 
1159   PetscFunctionBegin;
1160   switch (op) {
1161   case MAT_NO_NEW_NONZERO_LOCATIONS:
1162   case MAT_YES_NEW_NONZERO_LOCATIONS:
1163   case MAT_COLUMNS_UNSORTED:
1164   case MAT_COLUMNS_SORTED:
1165   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1166   case MAT_KEEP_ZEROED_ROWS:
1167   case MAT_NEW_NONZERO_LOCATION_ERR:
1168   case MAT_USE_INODES:
1169   case MAT_DO_NOT_USE_INODES:
1170   case MAT_IGNORE_ZERO_ENTRIES:
1171     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1172     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1173     break;
1174   case MAT_ROW_ORIENTED:
1175     a->roworiented = PETSC_TRUE;
1176     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1177     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1178     break;
1179   case MAT_ROWS_SORTED:
1180   case MAT_ROWS_UNSORTED:
1181   case MAT_YES_NEW_DIAGONALS:
1182     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1183     break;
1184   case MAT_COLUMN_ORIENTED:
1185     a->roworiented = PETSC_FALSE;
1186     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1187     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1188     break;
1189   case MAT_IGNORE_OFF_PROC_ENTRIES:
1190     a->donotstash = PETSC_TRUE;
1191     break;
1192   case MAT_NO_NEW_DIAGONALS:
1193     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1194   case MAT_SYMMETRIC:
1195   case MAT_STRUCTURALLY_SYMMETRIC:
1196   case MAT_HERMITIAN:
1197   case MAT_SYMMETRY_ETERNAL:
1198     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1199     break;
1200   case MAT_NOT_SYMMETRIC:
1201   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1202   case MAT_NOT_HERMITIAN:
1203   case MAT_NOT_SYMMETRY_ETERNAL:
1204     break;
1205   default:
1206     SETERRQ(PETSC_ERR_SUP,"unknown option");
1207   }
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatGetRow_MPIAIJ"
1213 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1214 {
1215   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1216   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1217   PetscErrorCode ierr;
1218   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1219   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1220   PetscInt       *cmap,*idx_p;
1221 
1222   PetscFunctionBegin;
1223   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1224   mat->getrowactive = PETSC_TRUE;
1225 
1226   if (!mat->rowvalues && (idx || v)) {
1227     /*
1228         allocate enough space to hold information from the longest row.
1229     */
1230     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1231     PetscInt     max = 1,tmp;
1232     for (i=0; i<matin->m; i++) {
1233       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1234       if (max < tmp) { max = tmp; }
1235     }
1236     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1237     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1238   }
1239 
1240   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1241   lrow = row - rstart;
1242 
1243   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1244   if (!v)   {pvA = 0; pvB = 0;}
1245   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1246   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1247   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1248   nztot = nzA + nzB;
1249 
1250   cmap  = mat->garray;
1251   if (v  || idx) {
1252     if (nztot) {
1253       /* Sort by increasing column numbers, assuming A and B already sorted */
1254       PetscInt imark = -1;
1255       if (v) {
1256         *v = v_p = mat->rowvalues;
1257         for (i=0; i<nzB; i++) {
1258           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1259           else break;
1260         }
1261         imark = i;
1262         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1263         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1264       }
1265       if (idx) {
1266         *idx = idx_p = mat->rowindices;
1267         if (imark > -1) {
1268           for (i=0; i<imark; i++) {
1269             idx_p[i] = cmap[cworkB[i]];
1270           }
1271         } else {
1272           for (i=0; i<nzB; i++) {
1273             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1274             else break;
1275           }
1276           imark = i;
1277         }
1278         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1279         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1280       }
1281     } else {
1282       if (idx) *idx = 0;
1283       if (v)   *v   = 0;
1284     }
1285   }
1286   *nz = nztot;
1287   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1288   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1294 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1295 {
1296   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1297 
1298   PetscFunctionBegin;
1299   if (aij->getrowactive == PETSC_FALSE) {
1300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1301   }
1302   aij->getrowactive = PETSC_FALSE;
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatNorm_MPIAIJ"
1308 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1309 {
1310   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1311   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1312   PetscErrorCode ierr;
1313   PetscInt       i,j,cstart = aij->cstart;
1314   PetscReal      sum = 0.0;
1315   PetscScalar    *v;
1316 
1317   PetscFunctionBegin;
1318   if (aij->size == 1) {
1319     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1320   } else {
1321     if (type == NORM_FROBENIUS) {
1322       v = amat->a;
1323       for (i=0; i<amat->nz; i++) {
1324 #if defined(PETSC_USE_COMPLEX)
1325         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1326 #else
1327         sum += (*v)*(*v); v++;
1328 #endif
1329       }
1330       v = bmat->a;
1331       for (i=0; i<bmat->nz; i++) {
1332 #if defined(PETSC_USE_COMPLEX)
1333         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1334 #else
1335         sum += (*v)*(*v); v++;
1336 #endif
1337       }
1338       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1339       *norm = sqrt(*norm);
1340     } else if (type == NORM_1) { /* max column norm */
1341       PetscReal *tmp,*tmp2;
1342       PetscInt    *jj,*garray = aij->garray;
1343       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1344       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1345       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1346       *norm = 0.0;
1347       v = amat->a; jj = amat->j;
1348       for (j=0; j<amat->nz; j++) {
1349         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1350       }
1351       v = bmat->a; jj = bmat->j;
1352       for (j=0; j<bmat->nz; j++) {
1353         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1354       }
1355       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1356       for (j=0; j<mat->N; j++) {
1357         if (tmp2[j] > *norm) *norm = tmp2[j];
1358       }
1359       ierr = PetscFree(tmp);CHKERRQ(ierr);
1360       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1361     } else if (type == NORM_INFINITY) { /* max row norm */
1362       PetscReal ntemp = 0.0;
1363       for (j=0; j<aij->A->m; j++) {
1364         v = amat->a + amat->i[j];
1365         sum = 0.0;
1366         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1367           sum += PetscAbsScalar(*v); v++;
1368         }
1369         v = bmat->a + bmat->i[j];
1370         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1371           sum += PetscAbsScalar(*v); v++;
1372         }
1373         if (sum > ntemp) ntemp = sum;
1374       }
1375       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1376     } else {
1377       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1378     }
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatTranspose_MPIAIJ"
1385 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1386 {
1387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1388   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1389   PetscErrorCode ierr;
1390   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1391   Mat            B;
1392   PetscScalar    *array;
1393 
1394   PetscFunctionBegin;
1395   if (!matout && M != N) {
1396     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1397   }
1398 
1399   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1400   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1401   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1402 
1403   /* copy over the A part */
1404   Aloc = (Mat_SeqAIJ*)a->A->data;
1405   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1406   row = a->rstart;
1407   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1408   for (i=0; i<m; i++) {
1409     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1410     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1411   }
1412   aj = Aloc->j;
1413   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1414 
1415   /* copy over the B part */
1416   Aloc = (Mat_SeqAIJ*)a->B->data;
1417   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1418   row  = a->rstart;
1419   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1420   ct   = cols;
1421   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1422   for (i=0; i<m; i++) {
1423     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1424     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1425   }
1426   ierr = PetscFree(ct);CHKERRQ(ierr);
1427   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1429   if (matout) {
1430     *matout = B;
1431   } else {
1432     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1433   }
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1439 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1440 {
1441   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1442   Mat            a = aij->A,b = aij->B;
1443   PetscErrorCode ierr;
1444   PetscInt       s1,s2,s3;
1445 
1446   PetscFunctionBegin;
1447   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1448   if (rr) {
1449     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1450     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1451     /* Overlap communication with computation. */
1452     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1453   }
1454   if (ll) {
1455     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1456     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1457     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1458   }
1459   /* scale  the diagonal block */
1460   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1461 
1462   if (rr) {
1463     /* Do a scatter end and then right scale the off-diagonal block */
1464     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1465     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1466   }
1467 
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1474 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1475 {
1476   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1477   PetscErrorCode ierr;
1478 
1479   PetscFunctionBegin;
1480   if (!a->rank) {
1481     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1482   }
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1488 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1489 {
1490   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1491   PetscErrorCode ierr;
1492 
1493   PetscFunctionBegin;
1494   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1495   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1496   PetscFunctionReturn(0);
1497 }
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1500 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1501 {
1502   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1503   PetscErrorCode ierr;
1504 
1505   PetscFunctionBegin;
1506   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 #undef __FUNCT__
1511 #define __FUNCT__ "MatEqual_MPIAIJ"
1512 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1513 {
1514   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1515   Mat            a,b,c,d;
1516   PetscTruth     flg;
1517   PetscErrorCode ierr;
1518 
1519   PetscFunctionBegin;
1520   a = matA->A; b = matA->B;
1521   c = matB->A; d = matB->B;
1522 
1523   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1524   if (flg == PETSC_TRUE) {
1525     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1526   }
1527   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatCopy_MPIAIJ"
1533 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1534 {
1535   PetscErrorCode ierr;
1536   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1537   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1538 
1539   PetscFunctionBegin;
1540   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1541   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1542     /* because of the column compression in the off-processor part of the matrix a->B,
1543        the number of columns in a->B and b->B may be different, hence we cannot call
1544        the MatCopy() directly on the two parts. If need be, we can provide a more
1545        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1546        then copying the submatrices */
1547     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1548   } else {
1549     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1550     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1551   }
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1557 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1558 {
1559   PetscErrorCode ierr;
1560 
1561   PetscFunctionBegin;
1562   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #include "petscblaslapack.h"
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatAXPY_MPIAIJ"
1569 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1570 {
1571   PetscErrorCode ierr;
1572   PetscInt       i;
1573   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1574   PetscBLASInt   bnz,one=1;
1575   Mat_SeqAIJ     *x,*y;
1576 
1577   PetscFunctionBegin;
1578   if (str == SAME_NONZERO_PATTERN) {
1579     x = (Mat_SeqAIJ *)xx->A->data;
1580     y = (Mat_SeqAIJ *)yy->A->data;
1581     bnz = (PetscBLASInt)x->nz;
1582     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1583     x = (Mat_SeqAIJ *)xx->B->data;
1584     y = (Mat_SeqAIJ *)yy->B->data;
1585     bnz = (PetscBLASInt)x->nz;
1586     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1587   } else if (str == SUBSET_NONZERO_PATTERN) {
1588     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1589 
1590     x = (Mat_SeqAIJ *)xx->B->data;
1591     y = (Mat_SeqAIJ *)yy->B->data;
1592     if (y->xtoy && y->XtoY != xx->B) {
1593       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1594       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1595     }
1596     if (!y->xtoy) { /* get xtoy */
1597       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1598       y->XtoY = xx->B;
1599     }
1600     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1601   } else {
1602     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1603   }
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 /* -------------------------------------------------------------------*/
1608 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1609        MatGetRow_MPIAIJ,
1610        MatRestoreRow_MPIAIJ,
1611        MatMult_MPIAIJ,
1612 /* 4*/ MatMultAdd_MPIAIJ,
1613        MatMultTranspose_MPIAIJ,
1614        MatMultTransposeAdd_MPIAIJ,
1615        0,
1616        0,
1617        0,
1618 /*10*/ 0,
1619        0,
1620        0,
1621        MatRelax_MPIAIJ,
1622        MatTranspose_MPIAIJ,
1623 /*15*/ MatGetInfo_MPIAIJ,
1624        MatEqual_MPIAIJ,
1625        MatGetDiagonal_MPIAIJ,
1626        MatDiagonalScale_MPIAIJ,
1627        MatNorm_MPIAIJ,
1628 /*20*/ MatAssemblyBegin_MPIAIJ,
1629        MatAssemblyEnd_MPIAIJ,
1630        0,
1631        MatSetOption_MPIAIJ,
1632        MatZeroEntries_MPIAIJ,
1633 /*25*/ MatZeroRows_MPIAIJ,
1634        0,
1635        0,
1636        0,
1637        0,
1638 /*30*/ MatSetUpPreallocation_MPIAIJ,
1639        0,
1640        0,
1641        0,
1642        0,
1643 /*35*/ MatDuplicate_MPIAIJ,
1644        0,
1645        0,
1646        0,
1647        0,
1648 /*40*/ MatAXPY_MPIAIJ,
1649        MatGetSubMatrices_MPIAIJ,
1650        MatIncreaseOverlap_MPIAIJ,
1651        MatGetValues_MPIAIJ,
1652        MatCopy_MPIAIJ,
1653 /*45*/ MatPrintHelp_MPIAIJ,
1654        MatScale_MPIAIJ,
1655        0,
1656        0,
1657        0,
1658 /*50*/ MatSetBlockSize_MPIAIJ,
1659        0,
1660        0,
1661        0,
1662        0,
1663 /*55*/ MatFDColoringCreate_MPIAIJ,
1664        0,
1665        MatSetUnfactored_MPIAIJ,
1666        0,
1667        0,
1668 /*60*/ MatGetSubMatrix_MPIAIJ,
1669        MatDestroy_MPIAIJ,
1670        MatView_MPIAIJ,
1671        MatGetPetscMaps_Petsc,
1672        0,
1673 /*65*/ 0,
1674        0,
1675        0,
1676        0,
1677        0,
1678 /*70*/ 0,
1679        0,
1680        MatSetColoring_MPIAIJ,
1681        MatSetValuesAdic_MPIAIJ,
1682        MatSetValuesAdifor_MPIAIJ,
1683 /*75*/ 0,
1684        0,
1685        0,
1686        0,
1687        0,
1688 /*80*/ 0,
1689        0,
1690        0,
1691        0,
1692 /*84*/ MatLoad_MPIAIJ,
1693        0,
1694        0,
1695        0,
1696        0,
1697        0,
1698 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1699        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1700        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1701        MatPtAP_MPIAIJ_MPIAIJ,
1702        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1703 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1704        0,
1705        0,
1706        0};
1707 
1708 /* ----------------------------------------------------------------------------------------*/
1709 
1710 EXTERN_C_BEGIN
1711 #undef __FUNCT__
1712 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1713 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1714 {
1715   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1720   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1721   PetscFunctionReturn(0);
1722 }
1723 EXTERN_C_END
1724 
1725 EXTERN_C_BEGIN
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1728 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1729 {
1730   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1735   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 EXTERN_C_END
1739 
1740 #include "petscpc.h"
1741 EXTERN_C_BEGIN
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1744 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1745 {
1746   Mat_MPIAIJ     *b;
1747   PetscErrorCode ierr;
1748   PetscInt       i;
1749 
1750   PetscFunctionBegin;
1751   B->preallocated = PETSC_TRUE;
1752   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1753   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1754   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1755   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1756   if (d_nnz) {
1757     for (i=0; i<B->m; i++) {
1758       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1759     }
1760   }
1761   if (o_nnz) {
1762     for (i=0; i<B->m; i++) {
1763       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1764     }
1765   }
1766   b = (Mat_MPIAIJ*)B->data;
1767   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1768   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1769 
1770   PetscFunctionReturn(0);
1771 }
1772 EXTERN_C_END
1773 
1774 /*MC
1775    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1776 
1777    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1778    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1779   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1780   for communicators controlling multiple processes.  It is recommended that you call both of
1781   the above preallocation routines for simplicity.
1782 
1783    Options Database Keys:
1784 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1785 
1786   Level: beginner
1787 
1788 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1789 M*/
1790 
1791 EXTERN_C_BEGIN
1792 #undef __FUNCT__
1793 #define __FUNCT__ "MatCreate_AIJ"
1794 PetscErrorCode MatCreate_AIJ(Mat A)
1795 {
1796   PetscErrorCode ierr;
1797   PetscMPIInt    size;
1798 
1799   PetscFunctionBegin;
1800   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1801   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1802   if (size == 1) {
1803     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1804   } else {
1805     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1806   }
1807   PetscFunctionReturn(0);
1808 }
1809 EXTERN_C_END
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1813 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1814 {
1815   Mat            mat;
1816   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   *newmat       = 0;
1821   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1822   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1823   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1824   a    = (Mat_MPIAIJ*)mat->data;
1825 
1826   mat->factor       = matin->factor;
1827   mat->bs           = matin->bs;
1828   mat->assembled    = PETSC_TRUE;
1829   mat->insertmode   = NOT_SET_VALUES;
1830   mat->preallocated = PETSC_TRUE;
1831 
1832   a->rstart       = oldmat->rstart;
1833   a->rend         = oldmat->rend;
1834   a->cstart       = oldmat->cstart;
1835   a->cend         = oldmat->cend;
1836   a->size         = oldmat->size;
1837   a->rank         = oldmat->rank;
1838   a->donotstash   = oldmat->donotstash;
1839   a->roworiented  = oldmat->roworiented;
1840   a->rowindices   = 0;
1841   a->rowvalues    = 0;
1842   a->getrowactive = PETSC_FALSE;
1843 
1844   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1845   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1846   if (oldmat->colmap) {
1847 #if defined (PETSC_USE_CTABLE)
1848     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1849 #else
1850     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1851     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1852     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1853 #endif
1854   } else a->colmap = 0;
1855   if (oldmat->garray) {
1856     PetscInt len;
1857     len  = oldmat->B->n;
1858     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1859     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1860     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1861   } else a->garray = 0;
1862 
1863   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1864   PetscLogObjectParent(mat,a->lvec);
1865   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1866   PetscLogObjectParent(mat,a->Mvctx);
1867   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1868   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1869   PetscLogObjectParent(mat,a->A);
1870   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1871   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1872   PetscLogObjectParent(mat,a->B);
1873   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1874   *newmat = mat;
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #include "petscsys.h"
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatLoad_MPIAIJ"
1882 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1883 {
1884   Mat            A;
1885   PetscScalar    *vals,*svals;
1886   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1887   MPI_Status     status;
1888   PetscErrorCode ierr;
1889   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1890   PetscInt       i,nz,j,rstart,rend;
1891   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1892   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1893   PetscInt       cend,cstart,n,*rowners;
1894   int            fd;
1895 
1896   PetscFunctionBegin;
1897   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1898   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1899   if (!rank) {
1900     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1901     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1902     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1903   }
1904 
1905   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1906   M = header[1]; N = header[2];
1907   /* determine ownership of all rows */
1908   m    = M/size + ((M % size) > rank);
1909   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1910   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1911   rowners[0] = 0;
1912   for (i=2; i<=size; i++) {
1913     rowners[i] += rowners[i-1];
1914   }
1915   rstart = rowners[rank];
1916   rend   = rowners[rank+1];
1917 
1918   /* distribute row lengths to all processors */
1919   ierr    = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
1920   if (!rank) {
1921     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1922     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1923     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1924     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1925     for (j=0; j<m; j++) {
1926       procsnz[0] += ourlens[j];
1927     }
1928     for (i=1; i<size; i++) {
1929       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1930       /* calculate the number of nonzeros on each processor */
1931       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1932         procsnz[i] += rowlengths[j];
1933       }
1934       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1935     }
1936     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1937   } else {
1938     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1939   }
1940 
1941   if (!rank) {
1942     /* determine max buffer needed and allocate it */
1943     maxnz = 0;
1944     for (i=0; i<size; i++) {
1945       maxnz = PetscMax(maxnz,procsnz[i]);
1946     }
1947     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1948 
1949     /* read in my part of the matrix column indices  */
1950     nz   = procsnz[0];
1951     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1952     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1953 
1954     /* read in every one elses and ship off */
1955     for (i=1; i<size; i++) {
1956       nz   = procsnz[i];
1957       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1958       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1959     }
1960     ierr = PetscFree(cols);CHKERRQ(ierr);
1961   } else {
1962     /* determine buffer space needed for message */
1963     nz = 0;
1964     for (i=0; i<m; i++) {
1965       nz += ourlens[i];
1966     }
1967     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1968 
1969     /* receive message of column indices*/
1970     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1971     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1972     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1973   }
1974 
1975   /* determine column ownership if matrix is not square */
1976   if (N != M) {
1977     n      = N/size + ((N % size) > rank);
1978     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1979     cstart = cend - n;
1980   } else {
1981     cstart = rstart;
1982     cend   = rend;
1983     n      = cend - cstart;
1984   }
1985 
1986   /* loop over local rows, determining number of off diagonal entries */
1987   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1988   jj = 0;
1989   for (i=0; i<m; i++) {
1990     for (j=0; j<ourlens[i]; j++) {
1991       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1992       jj++;
1993     }
1994   }
1995 
1996   /* create our matrix */
1997   for (i=0; i<m; i++) {
1998     ourlens[i] -= offlens[i];
1999   }
2000   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2001   ierr = MatSetType(A,type);CHKERRQ(ierr);
2002   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2003 
2004   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2005   for (i=0; i<m; i++) {
2006     ourlens[i] += offlens[i];
2007   }
2008 
2009   if (!rank) {
2010     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2011 
2012     /* read in my part of the matrix numerical values  */
2013     nz   = procsnz[0];
2014     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2015 
2016     /* insert into matrix */
2017     jj      = rstart;
2018     smycols = mycols;
2019     svals   = vals;
2020     for (i=0; i<m; i++) {
2021       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2022       smycols += ourlens[i];
2023       svals   += ourlens[i];
2024       jj++;
2025     }
2026 
2027     /* read in other processors and ship out */
2028     for (i=1; i<size; i++) {
2029       nz   = procsnz[i];
2030       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2031       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2032     }
2033     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2034   } else {
2035     /* receive numeric values */
2036     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2037 
2038     /* receive message of values*/
2039     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2040     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2041     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2042 
2043     /* insert into matrix */
2044     jj      = rstart;
2045     smycols = mycols;
2046     svals   = vals;
2047     for (i=0; i<m; i++) {
2048       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2049       smycols += ourlens[i];
2050       svals   += ourlens[i];
2051       jj++;
2052     }
2053   }
2054   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2055   ierr = PetscFree(vals);CHKERRQ(ierr);
2056   ierr = PetscFree(mycols);CHKERRQ(ierr);
2057   ierr = PetscFree(rowners);CHKERRQ(ierr);
2058 
2059   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2060   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2061   *newmat = A;
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2067 /*
2068     Not great since it makes two copies of the submatrix, first an SeqAIJ
2069   in local and then by concatenating the local matrices the end result.
2070   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2071 */
2072 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2073 {
2074   PetscErrorCode ierr;
2075   PetscMPIInt    rank,size;
2076   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2077   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2078   Mat            *local,M,Mreuse;
2079   PetscScalar    *vwork,*aa;
2080   MPI_Comm       comm = mat->comm;
2081   Mat_SeqAIJ     *aij;
2082 
2083 
2084   PetscFunctionBegin;
2085   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2086   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2087 
2088   if (call ==  MAT_REUSE_MATRIX) {
2089     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2090     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2091     local = &Mreuse;
2092     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2093   } else {
2094     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2095     Mreuse = *local;
2096     ierr   = PetscFree(local);CHKERRQ(ierr);
2097   }
2098 
2099   /*
2100       m - number of local rows
2101       n - number of columns (same on all processors)
2102       rstart - first row in new global matrix generated
2103   */
2104   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2105   if (call == MAT_INITIAL_MATRIX) {
2106     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2107     ii  = aij->i;
2108     jj  = aij->j;
2109 
2110     /*
2111         Determine the number of non-zeros in the diagonal and off-diagonal
2112         portions of the matrix in order to do correct preallocation
2113     */
2114 
2115     /* first get start and end of "diagonal" columns */
2116     if (csize == PETSC_DECIDE) {
2117       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2118       if (mglobal == n) { /* square matrix */
2119 	nlocal = m;
2120       } else {
2121         nlocal = n/size + ((n % size) > rank);
2122       }
2123     } else {
2124       nlocal = csize;
2125     }
2126     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2127     rstart = rend - nlocal;
2128     if (rank == size - 1 && rend != n) {
2129       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2130     }
2131 
2132     /* next, compute all the lengths */
2133     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2134     olens = dlens + m;
2135     for (i=0; i<m; i++) {
2136       jend = ii[i+1] - ii[i];
2137       olen = 0;
2138       dlen = 0;
2139       for (j=0; j<jend; j++) {
2140         if (*jj < rstart || *jj >= rend) olen++;
2141         else dlen++;
2142         jj++;
2143       }
2144       olens[i] = olen;
2145       dlens[i] = dlen;
2146     }
2147     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2148     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2149     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2150     ierr = PetscFree(dlens);CHKERRQ(ierr);
2151   } else {
2152     PetscInt ml,nl;
2153 
2154     M = *newmat;
2155     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2156     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2157     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2158     /*
2159          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2160        rather than the slower MatSetValues().
2161     */
2162     M->was_assembled = PETSC_TRUE;
2163     M->assembled     = PETSC_FALSE;
2164   }
2165   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2166   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2167   ii  = aij->i;
2168   jj  = aij->j;
2169   aa  = aij->a;
2170   for (i=0; i<m; i++) {
2171     row   = rstart + i;
2172     nz    = ii[i+1] - ii[i];
2173     cwork = jj;     jj += nz;
2174     vwork = aa;     aa += nz;
2175     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2176   }
2177 
2178   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2179   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2180   *newmat = M;
2181 
2182   /* save submatrix used in processor for next request */
2183   if (call ==  MAT_INITIAL_MATRIX) {
2184     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2185     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2186   }
2187 
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 EXTERN_C_BEGIN
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2194 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2195 {
2196   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2197   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2198   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2199   const PetscInt *JJ;
2200   PetscScalar    *values;
2201   PetscErrorCode ierr;
2202 
2203   PetscFunctionBegin;
2204 #if defined(PETSC_OPT_g)
2205   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2206 #endif
2207   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2208   o_nnz = d_nnz + m;
2209 
2210   for (i=0; i<m; i++) {
2211     nnz     = I[i+1]- I[i];
2212     JJ      = J + I[i];
2213     nnz_max = PetscMax(nnz_max,nnz);
2214 #if defined(PETSC_OPT_g)
2215     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2216 #endif
2217     for (j=0; j<nnz; j++) {
2218       if (*JJ >= cstart) break;
2219       JJ++;
2220     }
2221     d = 0;
2222     for (; j<nnz; j++) {
2223       if (*JJ++ >= cend) break;
2224       d++;
2225     }
2226     d_nnz[i] = d;
2227     o_nnz[i] = nnz - d;
2228   }
2229   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2230   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2231 
2232   if (v) values = (PetscScalar*)v;
2233   else {
2234     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2235     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2236   }
2237 
2238   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2239   for (i=0; i<m; i++) {
2240     ii   = i + rstart;
2241     nnz  = I[i+1]- I[i];
2242     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2243   }
2244   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2245   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2246   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2247 
2248   if (!v) {
2249     ierr = PetscFree(values);CHKERRQ(ierr);
2250   }
2251   PetscFunctionReturn(0);
2252 }
2253 EXTERN_C_END
2254 
2255 #undef __FUNCT__
2256 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2257 /*@C
2258    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2259    (the default parallel PETSc format).
2260 
2261    Collective on MPI_Comm
2262 
2263    Input Parameters:
2264 +  A - the matrix
2265 .  i - the indices into j for the start of each local row (starts with zero)
2266 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2267 -  v - optional values in the matrix
2268 
2269    Level: developer
2270 
2271 .keywords: matrix, aij, compressed row, sparse, parallel
2272 
2273 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2274 @*/
2275 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2276 {
2277   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2278 
2279   PetscFunctionBegin;
2280   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2281   if (f) {
2282     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2283   }
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 #undef __FUNCT__
2288 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2289 /*@C
2290    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2291    (the default parallel PETSc format).  For good matrix assembly performance
2292    the user should preallocate the matrix storage by setting the parameters
2293    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2294    performance can be increased by more than a factor of 50.
2295 
2296    Collective on MPI_Comm
2297 
2298    Input Parameters:
2299 +  A - the matrix
2300 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2301            (same value is used for all local rows)
2302 .  d_nnz - array containing the number of nonzeros in the various rows of the
2303            DIAGONAL portion of the local submatrix (possibly different for each row)
2304            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2305            The size of this array is equal to the number of local rows, i.e 'm'.
2306            You must leave room for the diagonal entry even if it is zero.
2307 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2308            submatrix (same value is used for all local rows).
2309 -  o_nnz - array containing the number of nonzeros in the various rows of the
2310            OFF-DIAGONAL portion of the local submatrix (possibly different for
2311            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2312            structure. The size of this array is equal to the number
2313            of local rows, i.e 'm'.
2314 
2315    The AIJ format (also called the Yale sparse matrix format or
2316    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2317    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2318 
2319    The parallel matrix is partitioned such that the first m0 rows belong to
2320    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2321    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2322 
2323    The DIAGONAL portion of the local submatrix of a processor can be defined
2324    as the submatrix which is obtained by extraction the part corresponding
2325    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2326    first row that belongs to the processor, and r2 is the last row belonging
2327    to the this processor. This is a square mxm matrix. The remaining portion
2328    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2329 
2330    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2331 
2332    Example usage:
2333 
2334    Consider the following 8x8 matrix with 34 non-zero values, that is
2335    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2336    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2337    as follows:
2338 
2339 .vb
2340             1  2  0  |  0  3  0  |  0  4
2341     Proc0   0  5  6  |  7  0  0  |  8  0
2342             9  0 10  | 11  0  0  | 12  0
2343     -------------------------------------
2344            13  0 14  | 15 16 17  |  0  0
2345     Proc1   0 18  0  | 19 20 21  |  0  0
2346             0  0  0  | 22 23  0  | 24  0
2347     -------------------------------------
2348     Proc2  25 26 27  |  0  0 28  | 29  0
2349            30  0  0  | 31 32 33  |  0 34
2350 .ve
2351 
2352    This can be represented as a collection of submatrices as:
2353 
2354 .vb
2355       A B C
2356       D E F
2357       G H I
2358 .ve
2359 
2360    Where the submatrices A,B,C are owned by proc0, D,E,F are
2361    owned by proc1, G,H,I are owned by proc2.
2362 
2363    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2364    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2365    The 'M','N' parameters are 8,8, and have the same values on all procs.
2366 
2367    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2368    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2369    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2370    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2371    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2372    matrix, ans [DF] as another SeqAIJ matrix.
2373 
2374    When d_nz, o_nz parameters are specified, d_nz storage elements are
2375    allocated for every row of the local diagonal submatrix, and o_nz
2376    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2377    One way to choose d_nz and o_nz is to use the max nonzerors per local
2378    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2379    In this case, the values of d_nz,o_nz are:
2380 .vb
2381      proc0 : dnz = 2, o_nz = 2
2382      proc1 : dnz = 3, o_nz = 2
2383      proc2 : dnz = 1, o_nz = 4
2384 .ve
2385    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2386    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2387    for proc3. i.e we are using 12+15+10=37 storage locations to store
2388    34 values.
2389 
2390    When d_nnz, o_nnz parameters are specified, the storage is specified
2391    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2392    In the above case the values for d_nnz,o_nnz are:
2393 .vb
2394      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2395      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2396      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2397 .ve
2398    Here the space allocated is sum of all the above values i.e 34, and
2399    hence pre-allocation is perfect.
2400 
2401    Level: intermediate
2402 
2403 .keywords: matrix, aij, compressed row, sparse, parallel
2404 
2405 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2406           MPIAIJ
2407 @*/
2408 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2409 {
2410   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2411 
2412   PetscFunctionBegin;
2413   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2414   if (f) {
2415     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2416   }
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatCreateMPIAIJ"
2422 /*@C
2423    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2424    (the default parallel PETSc format).  For good matrix assembly performance
2425    the user should preallocate the matrix storage by setting the parameters
2426    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2427    performance can be increased by more than a factor of 50.
2428 
2429    Collective on MPI_Comm
2430 
2431    Input Parameters:
2432 +  comm - MPI communicator
2433 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2434            This value should be the same as the local size used in creating the
2435            y vector for the matrix-vector product y = Ax.
2436 .  n - This value should be the same as the local size used in creating the
2437        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2438        calculated if N is given) For square matrices n is almost always m.
2439 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2440 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2441 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2442            (same value is used for all local rows)
2443 .  d_nnz - array containing the number of nonzeros in the various rows of the
2444            DIAGONAL portion of the local submatrix (possibly different for each row)
2445            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2446            The size of this array is equal to the number of local rows, i.e 'm'.
2447            You must leave room for the diagonal entry even if it is zero.
2448 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2449            submatrix (same value is used for all local rows).
2450 -  o_nnz - array containing the number of nonzeros in the various rows of the
2451            OFF-DIAGONAL portion of the local submatrix (possibly different for
2452            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2453            structure. The size of this array is equal to the number
2454            of local rows, i.e 'm'.
2455 
2456    Output Parameter:
2457 .  A - the matrix
2458 
2459    Notes:
2460    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2461    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2462    storage requirements for this matrix.
2463 
2464    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2465    processor than it must be used on all processors that share the object for
2466    that argument.
2467 
2468    The user MUST specify either the local or global matrix dimensions
2469    (possibly both).
2470 
2471    The parallel matrix is partitioned such that the first m0 rows belong to
2472    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2473    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2474 
2475    The DIAGONAL portion of the local submatrix of a processor can be defined
2476    as the submatrix which is obtained by extraction the part corresponding
2477    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2478    first row that belongs to the processor, and r2 is the last row belonging
2479    to the this processor. This is a square mxm matrix. The remaining portion
2480    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2481 
2482    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2483 
2484    When calling this routine with a single process communicator, a matrix of
2485    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2486    type of communicator, use the construction mechanism:
2487      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2488 
2489    By default, this format uses inodes (identical nodes) when possible.
2490    We search for consecutive rows with the same nonzero structure, thereby
2491    reusing matrix information to achieve increased efficiency.
2492 
2493    Options Database Keys:
2494 +  -mat_aij_no_inode  - Do not use inodes
2495 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2496 -  -mat_aij_oneindex - Internally use indexing starting at 1
2497         rather than 0.  Note that when calling MatSetValues(),
2498         the user still MUST index entries starting at 0!
2499 
2500 
2501    Example usage:
2502 
2503    Consider the following 8x8 matrix with 34 non-zero values, that is
2504    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2505    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2506    as follows:
2507 
2508 .vb
2509             1  2  0  |  0  3  0  |  0  4
2510     Proc0   0  5  6  |  7  0  0  |  8  0
2511             9  0 10  | 11  0  0  | 12  0
2512     -------------------------------------
2513            13  0 14  | 15 16 17  |  0  0
2514     Proc1   0 18  0  | 19 20 21  |  0  0
2515             0  0  0  | 22 23  0  | 24  0
2516     -------------------------------------
2517     Proc2  25 26 27  |  0  0 28  | 29  0
2518            30  0  0  | 31 32 33  |  0 34
2519 .ve
2520 
2521    This can be represented as a collection of submatrices as:
2522 
2523 .vb
2524       A B C
2525       D E F
2526       G H I
2527 .ve
2528 
2529    Where the submatrices A,B,C are owned by proc0, D,E,F are
2530    owned by proc1, G,H,I are owned by proc2.
2531 
2532    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2533    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2534    The 'M','N' parameters are 8,8, and have the same values on all procs.
2535 
2536    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2537    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2538    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2539    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2540    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2541    matrix, ans [DF] as another SeqAIJ matrix.
2542 
2543    When d_nz, o_nz parameters are specified, d_nz storage elements are
2544    allocated for every row of the local diagonal submatrix, and o_nz
2545    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2546    One way to choose d_nz and o_nz is to use the max nonzerors per local
2547    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2548    In this case, the values of d_nz,o_nz are:
2549 .vb
2550      proc0 : dnz = 2, o_nz = 2
2551      proc1 : dnz = 3, o_nz = 2
2552      proc2 : dnz = 1, o_nz = 4
2553 .ve
2554    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2555    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2556    for proc3. i.e we are using 12+15+10=37 storage locations to store
2557    34 values.
2558 
2559    When d_nnz, o_nnz parameters are specified, the storage is specified
2560    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2561    In the above case the values for d_nnz,o_nnz are:
2562 .vb
2563      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2564      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2565      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2566 .ve
2567    Here the space allocated is sum of all the above values i.e 34, and
2568    hence pre-allocation is perfect.
2569 
2570    Level: intermediate
2571 
2572 .keywords: matrix, aij, compressed row, sparse, parallel
2573 
2574 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2575           MPIAIJ
2576 @*/
2577 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2578 {
2579   PetscErrorCode ierr;
2580   PetscMPIInt    size;
2581 
2582   PetscFunctionBegin;
2583   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2584   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2585   if (size > 1) {
2586     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2587     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2588   } else {
2589     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2590     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2591   }
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 #undef __FUNCT__
2596 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2597 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2598 {
2599   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2600 
2601   PetscFunctionBegin;
2602   *Ad     = a->A;
2603   *Ao     = a->B;
2604   *colmap = a->garray;
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 #undef __FUNCT__
2609 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2610 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2611 {
2612   PetscErrorCode ierr;
2613   PetscInt       i;
2614   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2615 
2616   PetscFunctionBegin;
2617   if (coloring->ctype == IS_COLORING_LOCAL) {
2618     ISColoringValue *allcolors,*colors;
2619     ISColoring      ocoloring;
2620 
2621     /* set coloring for diagonal portion */
2622     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2623 
2624     /* set coloring for off-diagonal portion */
2625     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2626     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2627     for (i=0; i<a->B->n; i++) {
2628       colors[i] = allcolors[a->garray[i]];
2629     }
2630     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2631     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2632     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2633     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2634   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2635     ISColoringValue *colors;
2636     PetscInt             *larray;
2637     ISColoring      ocoloring;
2638 
2639     /* set coloring for diagonal portion */
2640     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2641     for (i=0; i<a->A->n; i++) {
2642       larray[i] = i + a->cstart;
2643     }
2644     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2645     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2646     for (i=0; i<a->A->n; i++) {
2647       colors[i] = coloring->colors[larray[i]];
2648     }
2649     ierr = PetscFree(larray);CHKERRQ(ierr);
2650     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2651     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2652     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2653 
2654     /* set coloring for off-diagonal portion */
2655     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2656     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2657     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2658     for (i=0; i<a->B->n; i++) {
2659       colors[i] = coloring->colors[larray[i]];
2660     }
2661     ierr = PetscFree(larray);CHKERRQ(ierr);
2662     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2663     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2664     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2665   } else {
2666     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2667   }
2668 
2669   PetscFunctionReturn(0);
2670 }
2671 
2672 #undef __FUNCT__
2673 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2674 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2675 {
2676   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2677   PetscErrorCode ierr;
2678 
2679   PetscFunctionBegin;
2680   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2681   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2682   PetscFunctionReturn(0);
2683 }
2684 
2685 #undef __FUNCT__
2686 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2687 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2688 {
2689   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2690   PetscErrorCode ierr;
2691 
2692   PetscFunctionBegin;
2693   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2694   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2695   PetscFunctionReturn(0);
2696 }
2697 
2698 #undef __FUNCT__
2699 #define __FUNCT__ "MatMerge"
2700 /*@C
2701       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2702                  matrices from each processor
2703 
2704     Collective on MPI_Comm
2705 
2706    Input Parameters:
2707 +    comm - the communicators the parallel matrix will live on
2708 .    inmat - the input sequential matrices
2709 .    n - number of local columns (or PETSC_DECIDE)
2710 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2711 
2712    Output Parameter:
2713 .    outmat - the parallel matrix generated
2714 
2715     Level: advanced
2716 
2717    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2718 
2719 @*/
2720 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2721 {
2722   PetscErrorCode    ierr;
2723   PetscInt          m,N,i,rstart,nnz,I,*dnz,*onz;
2724   const PetscInt    *indx;
2725   const PetscScalar *values;
2726   PetscMap          columnmap,rowmap;
2727 
2728   PetscFunctionBegin;
2729     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2730   /*
2731   PetscMPIInt       rank;
2732   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2733   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2734   */
2735   if (scall == MAT_INITIAL_MATRIX){
2736     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2737     if (n == PETSC_DECIDE){
2738       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2739       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2740       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2741       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2742       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2743     }
2744 
2745     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2746     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2747     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2748     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2749     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2750 
2751     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2752     for (i=0;i<m;i++) {
2753       ierr = MatGetRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2754       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2755       ierr = MatRestoreRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2756     }
2757     /* This routine will ONLY return MPIAIJ type matrix */
2758     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2759     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2760     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2761     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2762 
2763   } else if (scall == MAT_REUSE_MATRIX){
2764     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2765   } else {
2766     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2767   }
2768 
2769   for (i=0;i<m;i++) {
2770     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2771     I    = i + rstart;
2772     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2773     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2774   }
2775   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2776   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2777   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2778 
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatFileSplit"
2784 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2785 {
2786   PetscErrorCode    ierr;
2787   PetscMPIInt       rank;
2788   PetscInt          m,N,i,rstart,nnz;
2789   size_t            len;
2790   const PetscInt    *indx;
2791   PetscViewer       out;
2792   char              *name;
2793   Mat               B;
2794   const PetscScalar *values;
2795 
2796   PetscFunctionBegin;
2797   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2798   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2799   /* Should this be the type of the diagonal block of A? */
2800   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2801   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2802   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2803   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2804   for (i=0;i<m;i++) {
2805     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2806     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2807     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2808   }
2809   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2810   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2811 
2812   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2813   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2814   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2815   sprintf(name,"%s.%d",outfile,rank);
2816   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2817   ierr = PetscFree(name);
2818   ierr = MatView(B,out);CHKERRQ(ierr);
2819   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2820   ierr = MatDestroy(B);CHKERRQ(ierr);
2821   PetscFunctionReturn(0);
2822 }
2823 
2824 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2825 #undef __FUNCT__
2826 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2827 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2828 {
2829   PetscErrorCode       ierr;
2830   Mat_Merge_SeqsToMPI  *merge;
2831   PetscObjectContainer container;
2832 
2833   PetscFunctionBegin;
2834   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2835   if (container) {
2836     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2837     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2838     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2839     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2840     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2841     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2842     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2843     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2844     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2845     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2846     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2847     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2848 
2849     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2850     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2851   }
2852   ierr = PetscFree(merge);CHKERRQ(ierr);
2853 
2854   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 #include "src/mat/utils/freespace.h"
2859 #include "petscbt.h"
2860 #undef __FUNCT__
2861 #define __FUNCT__ "MatMerge_SeqsToMPI"
2862 /*@C
2863       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2864                  matrices from each processor
2865 
2866     Collective on MPI_Comm
2867 
2868    Input Parameters:
2869 +    comm - the communicators the parallel matrix will live on
2870 .    seqmat - the input sequential matrices
2871 .    m - number of local rows (or PETSC_DECIDE)
2872 .    n - number of local columns (or PETSC_DECIDE)
2873 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2874 
2875    Output Parameter:
2876 .    mpimat - the parallel matrix generated
2877 
2878     Level: advanced
2879 
2880    Notes:
2881      The dimensions of the sequential matrix in each processor MUST be the same.
2882      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2883      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2884 @*/
2885 static PetscEvent logkey_seqstompinum = 0;
2886 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2887 {
2888   PetscErrorCode       ierr;
2889   MPI_Comm             comm=mpimat->comm;
2890   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2891   PetscMPIInt          size,rank,taga,*len_s;
2892   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2893   PetscInt             proc,m;
2894   PetscInt             **buf_ri,**buf_rj;
2895   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2896   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2897   MPI_Request          *s_waits,*r_waits;
2898   MPI_Status           *status;
2899   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2900   Mat_Merge_SeqsToMPI  *merge;
2901   PetscObjectContainer container;
2902 
2903   PetscFunctionBegin;
2904   if (!logkey_seqstompinum) {
2905     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2906   }
2907   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2908 
2909   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2910   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2911 
2912   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2913   if (container) {
2914     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2915   }
2916   bi     = merge->bi;
2917   bj     = merge->bj;
2918   buf_ri = merge->buf_ri;
2919   buf_rj = merge->buf_rj;
2920 
2921   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2922   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2923   len_s  = merge->len_s;
2924 
2925   /* send and recv matrix values */
2926   /*-----------------------------*/
2927   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2928   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2929 
2930   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2931   for (proc=0,k=0; proc<size; proc++){
2932     if (!len_s[proc]) continue;
2933     i = owners[proc];
2934     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2935     k++;
2936   }
2937 
2938   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2939   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2940   ierr = PetscFree(status);CHKERRQ(ierr);
2941 
2942   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2943   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2944 
2945   /* insert mat values of mpimat */
2946   /*----------------------------*/
2947   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2948   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2949   nextrow = buf_ri_k + merge->nrecv;
2950   nextai  = nextrow + merge->nrecv;
2951 
2952   for (k=0; k<merge->nrecv; k++){
2953     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2954     nrows = *(buf_ri_k[k]);
2955     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2956     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2957   }
2958 
2959   /* set values of ba */
2960   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2961   for (i=0; i<m; i++) {
2962     arow = owners[rank] + i;
2963     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2964     bnzi = bi[i+1] - bi[i];
2965     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2966 
2967     /* add local non-zero vals of this proc's seqmat into ba */
2968     anzi = ai[arow+1] - ai[arow];
2969     aj   = a->j + ai[arow];
2970     aa   = a->a + ai[arow];
2971     nextaj = 0;
2972     for (j=0; nextaj<anzi; j++){
2973       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2974         ba_i[j] += aa[nextaj++];
2975       }
2976     }
2977 
2978     /* add received vals into ba */
2979     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2980       /* i-th row */
2981       if (i == *nextrow[k]) {
2982         anzi = *(nextai[k]+1) - *nextai[k];
2983         aj   = buf_rj[k] + *(nextai[k]);
2984         aa   = abuf_r[k] + *(nextai[k]);
2985         nextaj = 0;
2986         for (j=0; nextaj<anzi; j++){
2987           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2988             ba_i[j] += aa[nextaj++];
2989           }
2990         }
2991         nextrow[k]++; nextai[k]++;
2992       }
2993     }
2994     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2995   }
2996   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2997   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2998 
2999   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3000   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3001   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3002   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3003   PetscFunctionReturn(0);
3004 }
3005 static PetscEvent logkey_seqstompisym = 0;
3006 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3007 {
3008   PetscErrorCode       ierr;
3009   Mat                  B_mpi;
3010   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3011   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3012   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3013   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3014   PetscInt             len,proc,*dnz,*onz;
3015   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3016   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3017   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3018   MPI_Status           *status;
3019   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3020   PetscBT              lnkbt;
3021   Mat_Merge_SeqsToMPI  *merge;
3022   PetscObjectContainer container;
3023 
3024   PetscFunctionBegin;
3025   if (!logkey_seqstompisym) {
3026     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3027   }
3028   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3029 
3030   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3031   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3032 
3033   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3034   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3035 
3036   /* determine row ownership */
3037   /*---------------------------------------------------------*/
3038   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3039   if (m == PETSC_DECIDE) {
3040     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3041   } else {
3042     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3043   }
3044   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3045   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3046   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3047 
3048   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3049   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3050 
3051   /* determine the number of messages to send, their lengths */
3052   /*---------------------------------------------------------*/
3053   len_s  = merge->len_s;
3054 
3055   len = 0;  /* length of buf_si[] */
3056   merge->nsend = 0;
3057   for (proc=0; proc<size; proc++){
3058     len_si[proc] = 0;
3059     if (proc == rank){
3060       len_s[proc] = 0;
3061     } else {
3062       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3063       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3064     }
3065     if (len_s[proc]) {
3066       merge->nsend++;
3067       nrows = 0;
3068       for (i=owners[proc]; i<owners[proc+1]; i++){
3069         if (ai[i+1] > ai[i]) nrows++;
3070       }
3071       len_si[proc] = 2*(nrows+1);
3072       len += len_si[proc];
3073     }
3074   }
3075 
3076   /* determine the number and length of messages to receive for ij-structure */
3077   /*-------------------------------------------------------------------------*/
3078   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3079   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3080 
3081   /* post the Irecv of j-structure */
3082   /*-------------------------------*/
3083   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3084   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3085 
3086   /* post the Isend of j-structure */
3087   /*--------------------------------*/
3088   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3089   sj_waits = si_waits + merge->nsend;
3090 
3091   for (proc=0, k=0; proc<size; proc++){
3092     if (!len_s[proc]) continue;
3093     i = owners[proc];
3094     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3095     k++;
3096   }
3097 
3098   /* receives and sends of j-structure are complete */
3099   /*------------------------------------------------*/
3100   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3101   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3102 
3103   /* send and recv i-structure */
3104   /*---------------------------*/
3105   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3106   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3107 
3108   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3109   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3110   for (proc=0,k=0; proc<size; proc++){
3111     if (!len_s[proc]) continue;
3112     /* form outgoing message for i-structure:
3113          buf_si[0]:                 nrows to be sent
3114                [1:nrows]:           row index (global)
3115                [nrows+1:2*nrows+1]: i-structure index
3116     */
3117     /*-------------------------------------------*/
3118     nrows = len_si[proc]/2 - 1;
3119     buf_si_i    = buf_si + nrows+1;
3120     buf_si[0]   = nrows;
3121     buf_si_i[0] = 0;
3122     nrows = 0;
3123     for (i=owners[proc]; i<owners[proc+1]; i++){
3124       anzi = ai[i+1] - ai[i];
3125       if (anzi) {
3126         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3127         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3128         nrows++;
3129       }
3130     }
3131     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3132     k++;
3133     buf_si += len_si[proc];
3134   }
3135 
3136   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3137   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3138 
3139   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3140   for (i=0; i<merge->nrecv; i++){
3141     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3142   }
3143 
3144   ierr = PetscFree(len_si);CHKERRQ(ierr);
3145   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3146   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3147   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3148   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3149   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3150   ierr = PetscFree(status);CHKERRQ(ierr);
3151 
3152   /* compute a local seq matrix in each processor */
3153   /*----------------------------------------------*/
3154   /* allocate bi array and free space for accumulating nonzero column info */
3155   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3156   bi[0] = 0;
3157 
3158   /* create and initialize a linked list */
3159   nlnk = N+1;
3160   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3161 
3162   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3163   len = 0;
3164   len  = ai[owners[rank+1]] - ai[owners[rank]];
3165   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3166   current_space = free_space;
3167 
3168   /* determine symbolic info for each local row */
3169   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3170   nextrow = buf_ri_k + merge->nrecv;
3171   nextai  = nextrow + merge->nrecv;
3172   for (k=0; k<merge->nrecv; k++){
3173     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3174     nrows = *buf_ri_k[k];
3175     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3176     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3177   }
3178 
3179   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3180   len = 0;
3181   for (i=0;i<m;i++) {
3182     bnzi   = 0;
3183     /* add local non-zero cols of this proc's seqmat into lnk */
3184     arow   = owners[rank] + i;
3185     anzi   = ai[arow+1] - ai[arow];
3186     aj     = a->j + ai[arow];
3187     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3188     bnzi += nlnk;
3189     /* add received col data into lnk */
3190     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3191       if (i == *nextrow[k]) { /* i-th row */
3192         anzi = *(nextai[k]+1) - *nextai[k];
3193         aj   = buf_rj[k] + *nextai[k];
3194         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3195         bnzi += nlnk;
3196         nextrow[k]++; nextai[k]++;
3197       }
3198     }
3199     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3200 
3201     /* if free space is not available, make more free space */
3202     if (current_space->local_remaining<bnzi) {
3203       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3204       nspacedouble++;
3205     }
3206     /* copy data into free space, then initialize lnk */
3207     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3208     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3209 
3210     current_space->array           += bnzi;
3211     current_space->local_used      += bnzi;
3212     current_space->local_remaining -= bnzi;
3213 
3214     bi[i+1] = bi[i] + bnzi;
3215   }
3216 
3217   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3218 
3219   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3220   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3221   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3222 
3223   /* create symbolic parallel matrix B_mpi */
3224   /*---------------------------------------*/
3225   if (n==PETSC_DECIDE) {
3226     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3227   } else {
3228     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3229   }
3230   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3231   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3232   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3233 
3234   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3235   B_mpi->assembled     = PETSC_FALSE;
3236   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3237   merge->bi            = bi;
3238   merge->bj            = bj;
3239   merge->buf_ri        = buf_ri;
3240   merge->buf_rj        = buf_rj;
3241   merge->coi           = PETSC_NULL;
3242   merge->coj           = PETSC_NULL;
3243   merge->owners_co     = PETSC_NULL;
3244 
3245   /* attach the supporting struct to B_mpi for reuse */
3246   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3247   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3248   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3249   *mpimat = B_mpi;
3250   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3251   PetscFunctionReturn(0);
3252 }
3253 
3254 static PetscEvent logkey_seqstompi = 0;
3255 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3256 {
3257   PetscErrorCode   ierr;
3258 
3259   PetscFunctionBegin;
3260   if (!logkey_seqstompi) {
3261     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3262   }
3263   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3264   if (scall == MAT_INITIAL_MATRIX){
3265     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3266   }
3267   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3268   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3269   PetscFunctionReturn(0);
3270 }
3271 static PetscEvent logkey_getlocalmat = 0;
3272 #undef __FUNCT__
3273 #define __FUNCT__ "MatGetLocalMat"
3274 /*@C
3275      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3276 
3277     Not Collective
3278 
3279    Input Parameters:
3280 +    A - the matrix
3281 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3282 
3283    Output Parameter:
3284 .    A_loc - the local sequential matrix generated
3285 
3286     Level: developer
3287 
3288 @*/
3289 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3290 {
3291   PetscErrorCode  ierr;
3292   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3293   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3294   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3295   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3296   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3297   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3298 
3299   PetscFunctionBegin;
3300   if (!logkey_getlocalmat) {
3301     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3302   }
3303   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3304   if (scall == MAT_INITIAL_MATRIX){
3305     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3306     ci[0] = 0;
3307     for (i=0; i<am; i++){
3308       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3309     }
3310     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3311     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3312     k = 0;
3313     for (i=0; i<am; i++) {
3314       ncols_o = bi[i+1] - bi[i];
3315       ncols_d = ai[i+1] - ai[i];
3316       /* off-diagonal portion of A */
3317       for (jo=0; jo<ncols_o; jo++) {
3318         col = cmap[*bj];
3319         if (col >= cstart) break;
3320         cj[k]   = col; bj++;
3321         ca[k++] = *ba++;
3322       }
3323       /* diagonal portion of A */
3324       for (j=0; j<ncols_d; j++) {
3325         cj[k]   = cstart + *aj++;
3326         ca[k++] = *aa++;
3327       }
3328       /* off-diagonal portion of A */
3329       for (j=jo; j<ncols_o; j++) {
3330         cj[k]   = cmap[*bj++];
3331         ca[k++] = *ba++;
3332       }
3333     }
3334     /* put together the new matrix */
3335     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3336     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3337     /* Since these are PETSc arrays, change flags to free them as necessary. */
3338     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3339     mat->freedata = PETSC_TRUE;
3340     mat->nonew    = 0;
3341   } else if (scall == MAT_REUSE_MATRIX){
3342     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3343     ci = mat->i; cj = mat->j; ca = mat->a;
3344     for (i=0; i<am; i++) {
3345       /* off-diagonal portion of A */
3346       ncols_o = bi[i+1] - bi[i];
3347       for (jo=0; jo<ncols_o; jo++) {
3348         col = cmap[*bj];
3349         if (col >= cstart) break;
3350         *ca++ = *ba++; bj++;
3351       }
3352       /* diagonal portion of A */
3353       ncols_d = ai[i+1] - ai[i];
3354       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3355       /* off-diagonal portion of A */
3356       for (j=jo; j<ncols_o; j++) {
3357         *ca++ = *ba++; bj++;
3358       }
3359     }
3360   } else {
3361     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3362   }
3363 
3364   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3365   PetscFunctionReturn(0);
3366 }
3367 
3368 static PetscEvent logkey_getlocalmatcondensed = 0;
3369 #undef __FUNCT__
3370 #define __FUNCT__ "MatGetLocalMatCondensed"
3371 /*@C
3372      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3373 
3374     Not Collective
3375 
3376    Input Parameters:
3377 +    A - the matrix
3378 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3379 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3380 
3381    Output Parameter:
3382 .    A_loc - the local sequential matrix generated
3383 
3384     Level: developer
3385 
3386 @*/
3387 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3388 {
3389   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3390   PetscErrorCode    ierr;
3391   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3392   IS                isrowa,iscola;
3393   Mat               *aloc;
3394 
3395   PetscFunctionBegin;
3396   if (!logkey_getlocalmatcondensed) {
3397     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3398   }
3399   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3400   if (!row){
3401     start = a->rstart; end = a->rend;
3402     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3403   } else {
3404     isrowa = *row;
3405   }
3406   if (!col){
3407     start = a->cstart;
3408     cmap  = a->garray;
3409     nzA   = a->A->n;
3410     nzB   = a->B->n;
3411     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3412     ncols = 0;
3413     for (i=0; i<nzB; i++) {
3414       if (cmap[i] < start) idx[ncols++] = cmap[i];
3415       else break;
3416     }
3417     imark = i;
3418     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3419     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3420     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3421     ierr = PetscFree(idx);CHKERRQ(ierr);
3422   } else {
3423     iscola = *col;
3424   }
3425   if (scall != MAT_INITIAL_MATRIX){
3426     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3427     aloc[0] = *A_loc;
3428   }
3429   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3430   *A_loc = aloc[0];
3431   ierr = PetscFree(aloc);CHKERRQ(ierr);
3432   if (!row){
3433     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3434   }
3435   if (!col){
3436     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3437   }
3438   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3439   PetscFunctionReturn(0);
3440 }
3441 
3442 static PetscEvent logkey_GetBrowsOfAcols = 0;
3443 #undef __FUNCT__
3444 #define __FUNCT__ "MatGetBrowsOfAcols"
3445 /*@C
3446     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3447 
3448     Collective on Mat
3449 
3450    Input Parameters:
3451 +    A,B - the matrices in mpiaij format
3452 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3453 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3454 
3455    Output Parameter:
3456 +    rowb, colb - index sets of rows and columns of B to extract
3457 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3458 -    B_seq - the sequential matrix generated
3459 
3460     Level: developer
3461 
3462 @*/
3463 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3464 {
3465   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3466   PetscErrorCode    ierr;
3467   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3468   IS                isrowb,iscolb;
3469   Mat               *bseq;
3470 
3471   PetscFunctionBegin;
3472   if (a->cstart != b->rstart || a->cend != b->rend){
3473     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3474   }
3475   if (!logkey_GetBrowsOfAcols) {
3476     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3477   }
3478   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3479 
3480   if (scall == MAT_INITIAL_MATRIX){
3481     start = a->cstart;
3482     cmap  = a->garray;
3483     nzA   = a->A->n;
3484     nzB   = a->B->n;
3485     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3486     ncols = 0;
3487     for (i=0; i<nzB; i++) {  /* row < local row index */
3488       if (cmap[i] < start) idx[ncols++] = cmap[i];
3489       else break;
3490     }
3491     imark = i;
3492     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3493     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3494     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3495     ierr = PetscFree(idx);CHKERRQ(ierr);
3496     *brstart = imark;
3497     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3498   } else {
3499     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3500     isrowb = *rowb; iscolb = *colb;
3501     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3502     bseq[0] = *B_seq;
3503   }
3504   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3505   *B_seq = bseq[0];
3506   ierr = PetscFree(bseq);CHKERRQ(ierr);
3507   if (!rowb){
3508     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3509   } else {
3510     *rowb = isrowb;
3511   }
3512   if (!colb){
3513     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3514   } else {
3515     *colb = iscolb;
3516   }
3517   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3518   PetscFunctionReturn(0);
3519 }
3520 
3521 static PetscEvent logkey_GetBrowsOfAocols = 0;
3522 #undef __FUNCT__
3523 #define __FUNCT__ "MatGetBrowsOfAoCols"
3524 /*@C
3525     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3526     of the OFF-DIAGONAL portion of local A
3527 
3528     Collective on Mat
3529 
3530    Input Parameters:
3531 +    A,B - the matrices in mpiaij format
3532 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3533 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3534 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3535 
3536    Output Parameter:
3537 +    B_oth - the sequential matrix generated
3538 
3539     Level: developer
3540 
3541 @*/
3542 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3543 {
3544   VecScatter_MPI_General *gen_to,*gen_from;
3545   PetscErrorCode    ierr;
3546   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3547   Mat_SeqAIJ        *b_oth;
3548   VecScatter        ctx=a->Mvctx;
3549   MPI_Comm          comm=ctx->comm;
3550   PetscMPIInt       *rprocs,*sprocs,tag=ctx->tag,rank;
3551   PetscInt          *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3552   PetscScalar       *rvalues,*svalues,*b_otha,*bufa,*bufA;
3553   PetscInt          i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj,*sstarts,*sstartsj,len;
3554   MPI_Request       *rwaits,*swaits;
3555   MPI_Status        *sstatus,rstatus;
3556   const PetscInt    *cols;
3557   const PetscScalar *vals;
3558   PetscMPIInt       j;
3559 
3560   PetscFunctionBegin;
3561   if (a->cstart != b->rstart || a->cend != b->rend){
3562     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3563   }
3564   if (!logkey_GetBrowsOfAocols) {
3565     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3566   }
3567   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3568   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3569 
3570   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3571   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3572   rvalues  = gen_from->values; /* holds the length of sending row */
3573   svalues  = gen_to->values;   /* holds the length of receiving row */
3574   nrecvs   = gen_from->n;
3575   nsends   = gen_to->n;
3576   rwaits   = gen_from->requests;
3577   swaits   = gen_to->requests;
3578   rrow     = gen_from->indices; /* local row index to be received */
3579   srow     = gen_to->indices;   /* local row index to be sent */
3580   rstarts  = gen_from->starts;
3581   sstarts  = gen_to->starts;
3582   rprocs   = gen_from->procs;
3583   sprocs   = gen_to->procs;
3584   sstatus  = gen_to->sstatus;
3585 
3586   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3587   if (scall == MAT_INITIAL_MATRIX){
3588     /* i-array */
3589     /*---------*/
3590     /*  post receives */
3591     for (i=0; i<nrecvs; i++){
3592       rowlen = (PetscInt*)rvalues + rstarts[i];
3593       nrows = rstarts[i+1]-rstarts[i];
3594       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3595     }
3596 
3597     /* pack the outgoing message */
3598     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3599     rstartsj = sstartsj + nsends +1;
3600     sstartsj[0] = 0;  rstartsj[0] = 0;
3601     len = 0; /* total length of j or a array to be sent */
3602     k = 0;
3603     for (i=0; i<nsends; i++){
3604       rowlen = (PetscInt*)svalues + sstarts[i];
3605       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3606       for (j=0; j<nrows; j++) {
3607         row = srow[k] + b->rowners[rank]; /* global row idx */
3608         ierr = MatGetRow(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3609         len += rowlen[j];
3610         ierr = MatRestoreRow(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3611         k++;
3612       }
3613       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3614        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3615     }
3616     /* recvs and sends of i-array are completed */
3617     i = nrecvs;
3618     while (i--) {
3619       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3620     }
3621     if (nsends) {
3622       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3623     }
3624     /* allocate buffers for sending j and a arrays */
3625     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3626     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3627 
3628     /* create i-array of B_oth */
3629     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3630     b_othi[0] = 0;
3631     len = 0; /* total length of j or a array to be received */
3632     k = 0;
3633     for (i=0; i<nrecvs; i++){
3634       rowlen = (PetscInt*)rvalues + rstarts[i];
3635       nrows = rstarts[i+1]-rstarts[i];
3636       for (j=0; j<nrows; j++) {
3637         b_othi[k+1] = b_othi[k] + rowlen[j];
3638         len += rowlen[j]; k++;
3639       }
3640       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3641     }
3642 
3643     /* allocate space for j and a arrrays of B_oth */
3644     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3645     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3646 
3647     /* j-array */
3648     /*---------*/
3649     /*  post receives of j-array */
3650     for (i=0; i<nrecvs; i++){
3651       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3652       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3653     }
3654     k = 0;
3655     for (i=0; i<nsends; i++){
3656       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3657       bufJ = bufj+sstartsj[i];
3658       for (j=0; j<nrows; j++) {
3659         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3660         ierr = MatGetRow(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3661         for (l=0; l<ncols; l++){
3662           *bufJ++ = cols[l];
3663         }
3664         ierr = MatRestoreRow(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3665       }
3666       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3667     }
3668 
3669     /* recvs and sends of j-array are completed */
3670     i = nrecvs;
3671     while (i--) {
3672       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3673     }
3674     if (nsends) {
3675       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3676     }
3677   } else if (scall == MAT_REUSE_MATRIX){
3678     sstartsj = *startsj;
3679     rstartsj = sstartsj + nsends +1;
3680     bufa     = *bufa_ptr;
3681     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3682     b_otha   = b_oth->a;
3683   } else {
3684     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3685   }
3686 
3687   /* a-array */
3688   /*---------*/
3689   /*  post receives of a-array */
3690   for (i=0; i<nrecvs; i++){
3691     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3692     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3693   }
3694   k = 0;
3695   for (i=0; i<nsends; i++){
3696     nrows = sstarts[i+1]-sstarts[i];
3697     bufA = bufa+sstartsj[i];
3698     for (j=0; j<nrows; j++) {
3699       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3700       ierr = MatGetRow(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3701       for (l=0; l<ncols; l++){
3702         *bufA++ = vals[l];
3703       }
3704       ierr = MatRestoreRow(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3705 
3706     }
3707     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3708   }
3709   /* recvs and sends of a-array are completed */
3710   i = nrecvs;
3711   while (i--) {
3712     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3713   }
3714    if (nsends) {
3715     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3716   }
3717 
3718   if (scall == MAT_INITIAL_MATRIX){
3719     /* put together the new matrix */
3720     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3721 
3722     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3723     /* Since these are PETSc arrays, change flags to free them as necessary. */
3724     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3725     b_oth->freedata = PETSC_TRUE;
3726     b_oth->nonew    = 0;
3727 
3728     ierr = PetscFree(bufj);CHKERRQ(ierr);
3729     if (!startsj || !bufa_ptr){
3730       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3731       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3732     } else {
3733       *startsj  = sstartsj;
3734       *bufa_ptr = bufa;
3735     }
3736   }
3737   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3738 
3739   PetscFunctionReturn(0);
3740 }
3741 
3742 /*MC
3743    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3744 
3745    Options Database Keys:
3746 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3747 
3748   Level: beginner
3749 
3750 .seealso: MatCreateMPIAIJ
3751 M*/
3752 
3753 EXTERN_C_BEGIN
3754 #undef __FUNCT__
3755 #define __FUNCT__ "MatCreate_MPIAIJ"
3756 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3757 {
3758   Mat_MPIAIJ     *b;
3759   PetscErrorCode ierr;
3760   PetscInt       i;
3761   PetscMPIInt    size;
3762 
3763   PetscFunctionBegin;
3764   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3765 
3766   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3767   B->data         = (void*)b;
3768   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3769   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3770   B->factor       = 0;
3771   B->assembled    = PETSC_FALSE;
3772   B->mapping      = 0;
3773 
3774   B->insertmode      = NOT_SET_VALUES;
3775   b->size            = size;
3776   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3777 
3778   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3779   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3780 
3781   /* the information in the maps duplicates the information computed below, eventually
3782      we should remove the duplicate information that is not contained in the maps */
3783   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3784   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3785 
3786   /* build local table of row and column ownerships */
3787   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3788   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3789   b->cowners = b->rowners + b->size + 2;
3790   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3791   b->rowners[0] = 0;
3792   for (i=2; i<=b->size; i++) {
3793     b->rowners[i] += b->rowners[i-1];
3794   }
3795   b->rstart = b->rowners[b->rank];
3796   b->rend   = b->rowners[b->rank+1];
3797   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3798   b->cowners[0] = 0;
3799   for (i=2; i<=b->size; i++) {
3800     b->cowners[i] += b->cowners[i-1];
3801   }
3802   b->cstart = b->cowners[b->rank];
3803   b->cend   = b->cowners[b->rank+1];
3804 
3805   /* build cache for off array entries formed */
3806   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3807   b->donotstash  = PETSC_FALSE;
3808   b->colmap      = 0;
3809   b->garray      = 0;
3810   b->roworiented = PETSC_TRUE;
3811 
3812   /* stuff used for matrix vector multiply */
3813   b->lvec      = PETSC_NULL;
3814   b->Mvctx     = PETSC_NULL;
3815 
3816   /* stuff for MatGetRow() */
3817   b->rowindices   = 0;
3818   b->rowvalues    = 0;
3819   b->getrowactive = PETSC_FALSE;
3820 
3821   /* Explicitly create 2 MATSEQAIJ matrices. */
3822   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3823   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3824   PetscLogObjectParent(B,b->A);
3825   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3826   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3827   PetscLogObjectParent(B,b->B);
3828 
3829   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3830                                      "MatStoreValues_MPIAIJ",
3831                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3832   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3833                                      "MatRetrieveValues_MPIAIJ",
3834                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3835   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3836 				     "MatGetDiagonalBlock_MPIAIJ",
3837                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3838   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3839 				     "MatIsTranspose_MPIAIJ",
3840 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3841   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3842 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3843 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3844   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3845 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3846 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3847   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3848 				     "MatDiagonalScaleLocal_MPIAIJ",
3849 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3850   PetscFunctionReturn(0);
3851 }
3852 EXTERN_C_END
3853