xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4c8c32f384357201972f9dd67b0f2e901704fd35)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   int        n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(int));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (int*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(int));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (int*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(int));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar  value;
190   PetscErrorCode ierr;
191   int          i,j,rstart = aij->rstart,rend = aij->rend;
192   int          cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth   roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat          A = aij->A;
197   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar  *aa = a->a;
200   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat          B = aij->B;
202   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar  *ba = b->a;
205 
206   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   int          nonew = a->nonew,shift=0;
208   PetscScalar  *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   int        i,j,rstart = aij->rstart,rend = aij->rend;
278   int        cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   int         nstash,reallocs;
322   InsertMode  addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   int         i,j,rstart,ncols,n,flg;
351   int         *row,*col,other_disassembled;
352   PetscScalar *val;
353   InsertMode  addv = mat->insertmode;
354 
355   PetscFunctionBegin;
356   if (!aij->donotstash) {
357     while (1) {
358       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
359       if (!flg) break;
360 
361       for (i=0; i<n;) {
362         /* Now identify the consecutive vals belonging to the same row */
363         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
364         if (j < n) ncols = j-i;
365         else       ncols = n-i;
366         /* Now assemble all these values with a single function call */
367         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
368         i = j;
369       }
370     }
371     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
372   }
373 
374   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
376 
377   /* determine if any processor has disassembled, if so we must
378      also disassemble ourselfs, in order that we may reassemble. */
379   /*
380      if nonzero structure of submatrix B cannot change then we know that
381      no processor disassembled thus we can skip this stuff
382   */
383   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
384     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
385     if (mat->was_assembled && !other_disassembled) {
386       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
387     }
388   }
389 
390   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
391     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
392   }
393   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
395 
396   if (aij->rowvalues) {
397     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
398     aij->rowvalues = 0;
399   }
400 
401   /* used by MatAXPY() */
402   a->xtoy = 0; b->xtoy = 0;
403   a->XtoY = 0; b->XtoY = 0;
404 
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
411 {
412   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
413   PetscErrorCode ierr;
414 
415   PetscFunctionBegin;
416   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
417   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatZeroRows_MPIAIJ"
423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
424 {
425   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
426   PetscErrorCode ierr;
427   int            i,N,*rows,*owners = l->rowners,size = l->size;
428   int            *nprocs,j,idx,nsends,row;
429   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
430   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
431   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
432   MPI_Comm       comm = A->comm;
433   MPI_Request    *send_waits,*recv_waits;
434   MPI_Status     recv_status,*send_status;
435   IS             istmp;
436   PetscTruth     found;
437 
438   PetscFunctionBegin;
439   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
440   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
441 
442   /*  first count number of contributors to each processor */
443   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
444   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
445   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
446   for (i=0; i<N; i++) {
447     idx = rows[i];
448     found = PETSC_FALSE;
449     for (j=0; j<size; j++) {
450       if (idx >= owners[j] && idx < owners[j+1]) {
451         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
452       }
453     }
454     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
455   }
456   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
457 
458   /* inform other processors of number of messages and max length*/
459   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
460 
461   /* post receives:   */
462   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
463   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
464   for (i=0; i<nrecvs; i++) {
465     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
466   }
467 
468   /* do sends:
469       1) starts[i] gives the starting index in svalues for stuff going to
470          the ith processor
471   */
472   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
474   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
475   starts[0] = 0;
476   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
477   for (i=0; i<N; i++) {
478     svalues[starts[owner[i]]++] = rows[i];
479   }
480   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
481 
482   starts[0] = 0;
483   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
484   count = 0;
485   for (i=0; i<size; i++) {
486     if (nprocs[2*i+1]) {
487       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
488     }
489   }
490   ierr = PetscFree(starts);CHKERRQ(ierr);
491 
492   base = owners[rank];
493 
494   /*  wait on receives */
495   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
496   source = lens + nrecvs;
497   count  = nrecvs; slen = 0;
498   while (count) {
499     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
500     /* unpack receives into our local space */
501     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
502     source[imdex]  = recv_status.MPI_SOURCE;
503     lens[imdex]    = n;
504     slen          += n;
505     count--;
506   }
507   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
508 
509   /* move the data into the send scatter */
510   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
511   count = 0;
512   for (i=0; i<nrecvs; i++) {
513     values = rvalues + i*nmax;
514     for (j=0; j<lens[i]; j++) {
515       lrows[count++] = values[j] - base;
516     }
517   }
518   ierr = PetscFree(rvalues);CHKERRQ(ierr);
519   ierr = PetscFree(lens);CHKERRQ(ierr);
520   ierr = PetscFree(owner);CHKERRQ(ierr);
521   ierr = PetscFree(nprocs);CHKERRQ(ierr);
522 
523   /* actually zap the local rows */
524   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
525   PetscLogObjectParent(A,istmp);
526 
527   /*
528         Zero the required rows. If the "diagonal block" of the matrix
529      is square and the user wishes to set the diagonal we use seperate
530      code so that MatSetValues() is not called for each diagonal allocating
531      new memory, thus calling lots of mallocs and slowing things down.
532 
533        Contributed by: Mathew Knepley
534   */
535   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
536   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
537   if (diag && (l->A->M == l->A->N)) {
538     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
539   } else if (diag) {
540     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
541     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
542       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
544     }
545     for (i = 0; i < slen; i++) {
546       row  = lrows[i] + rstart;
547       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
548     }
549     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551   } else {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553   }
554   ierr = ISDestroy(istmp);CHKERRQ(ierr);
555   ierr = PetscFree(lrows);CHKERRQ(ierr);
556 
557   /* wait on sends */
558   if (nsends) {
559     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
560     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
561     ierr = PetscFree(send_status);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(send_waits);CHKERRQ(ierr);
564   ierr = PetscFree(svalues);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatMult_MPIAIJ"
571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
574   PetscErrorCode ierr;
575   int        nt;
576 
577   PetscFunctionBegin;
578   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
579   if (nt != A->n) {
580     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
581   }
582   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
584   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMultAdd_MPIAIJ"
591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   PetscErrorCode ierr;
595 
596   PetscFunctionBegin;
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   PetscErrorCode ierr;
610 
611   PetscFunctionBegin;
612   /* do nondiagonal part */
613   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
614   /* send it on its way */
615   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616   /* do local part */
617   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
618   /* receive remote parts: note this assumes the values are not actually */
619   /* inserted in yy until the next line, which is true for my implementation*/
620   /* but is not perhaps always true. */
621   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 EXTERN_C_BEGIN
626 #undef __FUNCT__
627 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
629 {
630   MPI_Comm comm;
631   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
632   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
633   IS         Me,Notme;
634   PetscErrorCode ierr;
635   int        M,N,first,last,*notme,ntids,i;
636 
637   PetscFunctionBegin;
638 
639   /* Easy test: symmetric diagonal block */
640   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
641   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
642   if (!*f) PetscFunctionReturn(0);
643   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
644   ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr);
645   if (ntids==1) PetscFunctionReturn(0);
646 
647   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
648   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
649   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
650   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme);CHKERRQ(ierr);
651   for (i=0; i<first; i++) notme[i] = i;
652   for (i=last; i<M; i++) notme[i-last+first] = i;
653   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
654   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
655   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
656   Aoff = Aoffs[0];
657   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
658   Boff = Boffs[0];
659   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
660   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
661   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
662   ierr = ISDestroy(Me);CHKERRQ(ierr);
663   ierr = ISDestroy(Notme);CHKERRQ(ierr);
664 
665   PetscFunctionReturn(0);
666 }
667 EXTERN_C_END
668 
669 #undef __FUNCT__
670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
672 {
673   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
674   PetscErrorCode ierr;
675 
676   PetscFunctionBegin;
677   /* do nondiagonal part */
678   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
679   /* send it on its way */
680   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
681   /* do local part */
682   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
683   /* receive remote parts: note this assumes the values are not actually */
684   /* inserted in yy until the next line, which is true for my implementation*/
685   /* but is not perhaps always true. */
686   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 /*
691   This only works correctly for square matrices where the subblock A->A is the
692    diagonal block
693 */
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
697 {
698   PetscErrorCode ierr;
699   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
700 
701   PetscFunctionBegin;
702   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
703   if (a->rstart != a->cstart || a->rend != a->cend) {
704     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
705   }
706   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatScale_MPIAIJ"
712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
713 {
714   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
719   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatDestroy_MPIAIJ"
725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
726 {
727   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731 #if defined(PETSC_USE_LOG)
732   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
733 #endif
734   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
735   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
736   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
737   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
738 #if defined (PETSC_USE_CTABLE)
739   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
740 #else
741   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
742 #endif
743   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
744   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
745   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
746   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
747   ierr = PetscFree(aij);CHKERRQ(ierr);
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatView_MPIAIJ_Binary"
753 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
754 {
755   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
756   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
757   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
758   PetscErrorCode ierr;
759   int               nz,fd,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
760   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
761   PetscScalar       *column_values;
762 
763   PetscFunctionBegin;
764   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
765   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
766   nz   = A->nz + B->nz;
767   if (!rank) {
768     header[0] = MAT_FILE_COOKIE;
769     header[1] = mat->M;
770     header[2] = mat->N;
771     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
773     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
774     /* get largest number of rows any processor has */
775     rlen = mat->m;
776     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
777     for (i=1; i<size; i++) {
778       rlen = PetscMax(rlen,range[i+1] - range[i]);
779     }
780   } else {
781     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
782     rlen = mat->m;
783   }
784 
785   /* load up the local row counts */
786   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
787   for (i=0; i<mat->m; i++) {
788     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
789   }
790 
791   /* store the row lengths to the file */
792   if (!rank) {
793     MPI_Status status;
794     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
795     for (i=1; i<size; i++) {
796       rlen = range[i+1] - range[i];
797       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
798       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
799     }
800   } else {
801     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
802   }
803   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
804 
805   /* load up the local column indices */
806   nzmax = nz; /* )th processor needs space a largest processor needs */
807   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
808   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
809   cnt  = 0;
810   for (i=0; i<mat->m; i++) {
811     for (j=B->i[i]; j<B->i[i+1]; j++) {
812       if ( (col = garray[B->j[j]]) > cstart) break;
813       column_indices[cnt++] = col;
814     }
815     for (k=A->i[i]; k<A->i[i+1]; k++) {
816       column_indices[cnt++] = A->j[k] + cstart;
817     }
818     for (; j<B->i[i+1]; j++) {
819       column_indices[cnt++] = garray[B->j[j]];
820     }
821   }
822   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
823 
824   /* store the column indices to the file */
825   if (!rank) {
826     MPI_Status status;
827     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
828     for (i=1; i<size; i++) {
829       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
830       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
831       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
832       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
833     }
834   } else {
835     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
836     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
837   }
838   ierr = PetscFree(column_indices);CHKERRQ(ierr);
839 
840   /* load up the local column values */
841   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
842   cnt  = 0;
843   for (i=0; i<mat->m; i++) {
844     for (j=B->i[i]; j<B->i[i+1]; j++) {
845       if ( garray[B->j[j]] > cstart) break;
846       column_values[cnt++] = B->a[j];
847     }
848     for (k=A->i[i]; k<A->i[i+1]; k++) {
849       column_values[cnt++] = A->a[k];
850     }
851     for (; j<B->i[i+1]; j++) {
852       column_values[cnt++] = B->a[j];
853     }
854   }
855   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
856 
857   /* store the column values to the file */
858   if (!rank) {
859     MPI_Status status;
860     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
861     for (i=1; i<size; i++) {
862       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
863       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
864       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
865       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
866     }
867   } else {
868     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
869     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
870   }
871   ierr = PetscFree(column_values);CHKERRQ(ierr);
872   PetscFunctionReturn(0);
873 }
874 
875 #undef __FUNCT__
876 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
877 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
878 {
879   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
880   PetscErrorCode ierr;
881   int               rank = aij->rank,size = aij->size;
882   PetscTruth        isdraw,iascii,flg,isbinary;
883   PetscViewer       sviewer;
884   PetscViewerFormat format;
885 
886   PetscFunctionBegin;
887   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
888   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
889   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
890   if (iascii) {
891     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
892     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
893       MatInfo info;
894       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
895       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
896       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
897       if (flg) {
898         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
899 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
900       } else {
901         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
902 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
903       }
904       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
905       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
906       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
907       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
908       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
909       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
910       PetscFunctionReturn(0);
911     } else if (format == PETSC_VIEWER_ASCII_INFO) {
912       PetscFunctionReturn(0);
913     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
914       PetscFunctionReturn(0);
915     }
916   } else if (isbinary) {
917     if (size == 1) {
918       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
919       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
920     } else {
921       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
922     }
923     PetscFunctionReturn(0);
924   } else if (isdraw) {
925     PetscDraw  draw;
926     PetscTruth isnull;
927     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
928     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
929   }
930 
931   if (size == 1) {
932     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934   } else {
935     /* assemble the entire matrix onto first processor. */
936     Mat         A;
937     Mat_SeqAIJ *Aloc;
938     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
939     PetscScalar *a;
940 
941     if (!rank) {
942       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
943     } else {
944       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
945     }
946     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
947     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
948     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
949     PetscLogObjectParent(mat,A);
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqAIJ*)aij->A->data;
953     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row = aij->rstart;
955     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
959     }
960     aj = Aloc->j;
961     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
962 
963     /* copy over the B part */
964     Aloc = (Mat_SeqAIJ*)aij->B->data;
965     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
966     row  = aij->rstart;
967     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
968     ct   = cols;
969     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
973     }
974     ierr = PetscFree(ct);CHKERRQ(ierr);
975     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     /*
978        Everyone has to call to draw the matrix since the graphics waits are
979        synchronized across all processors that share the PetscDraw object
980     */
981     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
984       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
985     }
986     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     ierr = MatDestroy(A);CHKERRQ(ierr);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatView_MPIAIJ"
994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
995 {
996   PetscErrorCode ierr;
997   PetscTruth iascii,isdraw,issocket,isbinary;
998 
999   PetscFunctionBegin;
1000   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1002   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1004   if (iascii || isdraw || isbinary || issocket) {
1005     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1006   } else {
1007     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1017 {
1018   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec          bb1;
1021   PetscScalar  mone=-1.0;
1022 
1023   PetscFunctionBegin;
1024   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1025 
1026   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1027 
1028   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1029     if (flag & SOR_ZERO_INITIAL_GUESS) {
1030       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1031       its--;
1032     }
1033 
1034     while (its--) {
1035       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1037 
1038       /* update rhs: bb1 = bb - B*x */
1039       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1040       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1041 
1042       /* local sweep */
1043       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1044       CHKERRQ(ierr);
1045     }
1046   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1047     if (flag & SOR_ZERO_INITIAL_GUESS) {
1048       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1049       its--;
1050     }
1051     while (its--) {
1052       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054 
1055       /* update rhs: bb1 = bb - B*x */
1056       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1057       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1058 
1059       /* local sweep */
1060       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1061       CHKERRQ(ierr);
1062     }
1063   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1064     if (flag & SOR_ZERO_INITIAL_GUESS) {
1065       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1066       its--;
1067     }
1068     while (its--) {
1069       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071 
1072       /* update rhs: bb1 = bb - B*x */
1073       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1074       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1075 
1076       /* local sweep */
1077       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1078       CHKERRQ(ierr);
1079     }
1080   } else {
1081     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1082   }
1083 
1084   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1090 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1091 {
1092   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1093   Mat        A = mat->A,B = mat->B;
1094   PetscErrorCode ierr;
1095   PetscReal  isend[5],irecv[5];
1096 
1097   PetscFunctionBegin;
1098   info->block_size     = 1.0;
1099   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1100   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1101   isend[3] = info->memory;  isend[4] = info->mallocs;
1102   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1103   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1104   isend[3] += info->memory;  isend[4] += info->mallocs;
1105   if (flag == MAT_LOCAL) {
1106     info->nz_used      = isend[0];
1107     info->nz_allocated = isend[1];
1108     info->nz_unneeded  = isend[2];
1109     info->memory       = isend[3];
1110     info->mallocs      = isend[4];
1111   } else if (flag == MAT_GLOBAL_MAX) {
1112     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1113     info->nz_used      = irecv[0];
1114     info->nz_allocated = irecv[1];
1115     info->nz_unneeded  = irecv[2];
1116     info->memory       = irecv[3];
1117     info->mallocs      = irecv[4];
1118   } else if (flag == MAT_GLOBAL_SUM) {
1119     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1120     info->nz_used      = irecv[0];
1121     info->nz_allocated = irecv[1];
1122     info->nz_unneeded  = irecv[2];
1123     info->memory       = irecv[3];
1124     info->mallocs      = irecv[4];
1125   }
1126   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1127   info->fill_ratio_needed = 0;
1128   info->factor_mallocs    = 0;
1129   info->rows_global       = (double)matin->M;
1130   info->columns_global    = (double)matin->N;
1131   info->rows_local        = (double)matin->m;
1132   info->columns_local     = (double)matin->N;
1133 
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatSetOption_MPIAIJ"
1139 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1140 {
1141   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   switch (op) {
1146   case MAT_NO_NEW_NONZERO_LOCATIONS:
1147   case MAT_YES_NEW_NONZERO_LOCATIONS:
1148   case MAT_COLUMNS_UNSORTED:
1149   case MAT_COLUMNS_SORTED:
1150   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1151   case MAT_KEEP_ZEROED_ROWS:
1152   case MAT_NEW_NONZERO_LOCATION_ERR:
1153   case MAT_USE_INODES:
1154   case MAT_DO_NOT_USE_INODES:
1155   case MAT_IGNORE_ZERO_ENTRIES:
1156     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1157     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1158     break;
1159   case MAT_ROW_ORIENTED:
1160     a->roworiented = PETSC_TRUE;
1161     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1162     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1163     break;
1164   case MAT_ROWS_SORTED:
1165   case MAT_ROWS_UNSORTED:
1166   case MAT_YES_NEW_DIAGONALS:
1167     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1168     break;
1169   case MAT_COLUMN_ORIENTED:
1170     a->roworiented = PETSC_FALSE;
1171     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1172     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1173     break;
1174   case MAT_IGNORE_OFF_PROC_ENTRIES:
1175     a->donotstash = PETSC_TRUE;
1176     break;
1177   case MAT_NO_NEW_DIAGONALS:
1178     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1179   case MAT_SYMMETRIC:
1180   case MAT_STRUCTURALLY_SYMMETRIC:
1181   case MAT_HERMITIAN:
1182   case MAT_SYMMETRY_ETERNAL:
1183     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1184     break;
1185   case MAT_NOT_SYMMETRIC:
1186   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1187   case MAT_NOT_HERMITIAN:
1188   case MAT_NOT_SYMMETRY_ETERNAL:
1189     break;
1190   default:
1191     SETERRQ(PETSC_ERR_SUP,"unknown option");
1192   }
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatGetRow_MPIAIJ"
1198 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1199 {
1200   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1201   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1202   PetscErrorCode ierr;
1203   int          i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1204   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1205   int          *cmap,*idx_p;
1206 
1207   PetscFunctionBegin;
1208   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1209   mat->getrowactive = PETSC_TRUE;
1210 
1211   if (!mat->rowvalues && (idx || v)) {
1212     /*
1213         allocate enough space to hold information from the longest row.
1214     */
1215     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1216     int     max = 1,tmp;
1217     for (i=0; i<matin->m; i++) {
1218       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1219       if (max < tmp) { max = tmp; }
1220     }
1221     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1222     mat->rowindices = (int*)(mat->rowvalues + max);
1223   }
1224 
1225   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1226   lrow = row - rstart;
1227 
1228   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1229   if (!v)   {pvA = 0; pvB = 0;}
1230   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1231   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1232   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1233   nztot = nzA + nzB;
1234 
1235   cmap  = mat->garray;
1236   if (v  || idx) {
1237     if (nztot) {
1238       /* Sort by increasing column numbers, assuming A and B already sorted */
1239       int imark = -1;
1240       if (v) {
1241         *v = v_p = mat->rowvalues;
1242         for (i=0; i<nzB; i++) {
1243           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1244           else break;
1245         }
1246         imark = i;
1247         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1248         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1249       }
1250       if (idx) {
1251         *idx = idx_p = mat->rowindices;
1252         if (imark > -1) {
1253           for (i=0; i<imark; i++) {
1254             idx_p[i] = cmap[cworkB[i]];
1255           }
1256         } else {
1257           for (i=0; i<nzB; i++) {
1258             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1259             else break;
1260           }
1261           imark = i;
1262         }
1263         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1264         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1265       }
1266     } else {
1267       if (idx) *idx = 0;
1268       if (v)   *v   = 0;
1269     }
1270   }
1271   *nz = nztot;
1272   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1273   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1279 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1280 {
1281   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1282 
1283   PetscFunctionBegin;
1284   if (aij->getrowactive == PETSC_FALSE) {
1285     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1286   }
1287   aij->getrowactive = PETSC_FALSE;
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatNorm_MPIAIJ"
1293 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1294 {
1295   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1296   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1297   PetscErrorCode ierr;
1298   int          i,j,cstart = aij->cstart;
1299   PetscReal    sum = 0.0;
1300   PetscScalar  *v;
1301 
1302   PetscFunctionBegin;
1303   if (aij->size == 1) {
1304     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1305   } else {
1306     if (type == NORM_FROBENIUS) {
1307       v = amat->a;
1308       for (i=0; i<amat->nz; i++) {
1309 #if defined(PETSC_USE_COMPLEX)
1310         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1311 #else
1312         sum += (*v)*(*v); v++;
1313 #endif
1314       }
1315       v = bmat->a;
1316       for (i=0; i<bmat->nz; i++) {
1317 #if defined(PETSC_USE_COMPLEX)
1318         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1319 #else
1320         sum += (*v)*(*v); v++;
1321 #endif
1322       }
1323       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1324       *norm = sqrt(*norm);
1325     } else if (type == NORM_1) { /* max column norm */
1326       PetscReal *tmp,*tmp2;
1327       int    *jj,*garray = aij->garray;
1328       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1329       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1330       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1331       *norm = 0.0;
1332       v = amat->a; jj = amat->j;
1333       for (j=0; j<amat->nz; j++) {
1334         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1335       }
1336       v = bmat->a; jj = bmat->j;
1337       for (j=0; j<bmat->nz; j++) {
1338         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1339       }
1340       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1341       for (j=0; j<mat->N; j++) {
1342         if (tmp2[j] > *norm) *norm = tmp2[j];
1343       }
1344       ierr = PetscFree(tmp);CHKERRQ(ierr);
1345       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1346     } else if (type == NORM_INFINITY) { /* max row norm */
1347       PetscReal ntemp = 0.0;
1348       for (j=0; j<aij->A->m; j++) {
1349         v = amat->a + amat->i[j];
1350         sum = 0.0;
1351         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1352           sum += PetscAbsScalar(*v); v++;
1353         }
1354         v = bmat->a + bmat->i[j];
1355         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1356           sum += PetscAbsScalar(*v); v++;
1357         }
1358         if (sum > ntemp) ntemp = sum;
1359       }
1360       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1361     } else {
1362       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1363     }
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatTranspose_MPIAIJ"
1370 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1371 {
1372   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1373   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1374   PetscErrorCode ierr;
1375   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1376   Mat          B;
1377   PetscScalar  *array;
1378 
1379   PetscFunctionBegin;
1380   if (!matout && M != N) {
1381     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1382   }
1383 
1384   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1385   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1386   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1387 
1388   /* copy over the A part */
1389   Aloc = (Mat_SeqAIJ*)a->A->data;
1390   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1391   row = a->rstart;
1392   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1393   for (i=0; i<m; i++) {
1394     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1395     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1396   }
1397   aj = Aloc->j;
1398   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1399 
1400   /* copy over the B part */
1401   Aloc = (Mat_SeqAIJ*)a->B->data;
1402   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1403   row  = a->rstart;
1404   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1405   ct   = cols;
1406   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1407   for (i=0; i<m; i++) {
1408     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1409     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1410   }
1411   ierr = PetscFree(ct);CHKERRQ(ierr);
1412   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1413   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1414   if (matout) {
1415     *matout = B;
1416   } else {
1417     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1424 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1425 {
1426   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1427   Mat        a = aij->A,b = aij->B;
1428   PetscErrorCode ierr;
1429   int        s1,s2,s3;
1430 
1431   PetscFunctionBegin;
1432   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1433   if (rr) {
1434     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1435     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1436     /* Overlap communication with computation. */
1437     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1438   }
1439   if (ll) {
1440     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1441     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1442     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1443   }
1444   /* scale  the diagonal block */
1445   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1446 
1447   if (rr) {
1448     /* Do a scatter end and then right scale the off-diagonal block */
1449     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1450     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1451   }
1452 
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1459 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1460 {
1461   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1462   PetscErrorCode ierr;
1463 
1464   PetscFunctionBegin;
1465   if (!a->rank) {
1466     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1473 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1474 {
1475   PetscFunctionBegin;
1476   *bs = 1;
1477   PetscFunctionReturn(0);
1478 }
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1481 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1482 {
1483   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1484   PetscErrorCode ierr;
1485 
1486   PetscFunctionBegin;
1487   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatEqual_MPIAIJ"
1493 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1494 {
1495   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1496   Mat        a,b,c,d;
1497   PetscTruth flg;
1498   PetscErrorCode ierr;
1499 
1500   PetscFunctionBegin;
1501   a = matA->A; b = matA->B;
1502   c = matB->A; d = matB->B;
1503 
1504   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1505   if (flg == PETSC_TRUE) {
1506     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1507   }
1508   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatCopy_MPIAIJ"
1514 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1515 {
1516   PetscErrorCode ierr;
1517   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1518   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1519 
1520   PetscFunctionBegin;
1521   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1522   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1523     /* because of the column compression in the off-processor part of the matrix a->B,
1524        the number of columns in a->B and b->B may be different, hence we cannot call
1525        the MatCopy() directly on the two parts. If need be, we can provide a more
1526        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1527        then copying the submatrices */
1528     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1529   } else {
1530     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1531     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1532   }
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1538 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1539 {
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #include "petscblaslapack.h"
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatAXPY_MPIAIJ"
1550 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1551 {
1552   PetscErrorCode ierr;
1553   int            i;
1554   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1555   PetscBLASInt   bnz,one=1;
1556   Mat_SeqAIJ     *x,*y;
1557 
1558   PetscFunctionBegin;
1559   if (str == SAME_NONZERO_PATTERN) {
1560     x = (Mat_SeqAIJ *)xx->A->data;
1561     y = (Mat_SeqAIJ *)yy->A->data;
1562     bnz = (PetscBLASInt)x->nz;
1563     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1564     x = (Mat_SeqAIJ *)xx->B->data;
1565     y = (Mat_SeqAIJ *)yy->B->data;
1566     bnz = (PetscBLASInt)x->nz;
1567     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1568   } else if (str == SUBSET_NONZERO_PATTERN) {
1569     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1570 
1571     x = (Mat_SeqAIJ *)xx->B->data;
1572     y = (Mat_SeqAIJ *)yy->B->data;
1573     if (y->xtoy && y->XtoY != xx->B) {
1574       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1575       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1576     }
1577     if (!y->xtoy) { /* get xtoy */
1578       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1579       y->XtoY = xx->B;
1580     }
1581     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1582   } else {
1583     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 /* -------------------------------------------------------------------*/
1589 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1590        MatGetRow_MPIAIJ,
1591        MatRestoreRow_MPIAIJ,
1592        MatMult_MPIAIJ,
1593 /* 4*/ MatMultAdd_MPIAIJ,
1594        MatMultTranspose_MPIAIJ,
1595        MatMultTransposeAdd_MPIAIJ,
1596        0,
1597        0,
1598        0,
1599 /*10*/ 0,
1600        0,
1601        0,
1602        MatRelax_MPIAIJ,
1603        MatTranspose_MPIAIJ,
1604 /*15*/ MatGetInfo_MPIAIJ,
1605        MatEqual_MPIAIJ,
1606        MatGetDiagonal_MPIAIJ,
1607        MatDiagonalScale_MPIAIJ,
1608        MatNorm_MPIAIJ,
1609 /*20*/ MatAssemblyBegin_MPIAIJ,
1610        MatAssemblyEnd_MPIAIJ,
1611        0,
1612        MatSetOption_MPIAIJ,
1613        MatZeroEntries_MPIAIJ,
1614 /*25*/ MatZeroRows_MPIAIJ,
1615 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1616        MatLUFactorSymbolic_MPIAIJ_TFS,
1617 #else
1618        0,
1619 #endif
1620        0,
1621        0,
1622        0,
1623 /*30*/ MatSetUpPreallocation_MPIAIJ,
1624        0,
1625        0,
1626        0,
1627        0,
1628 /*35*/ MatDuplicate_MPIAIJ,
1629        0,
1630        0,
1631        0,
1632        0,
1633 /*40*/ MatAXPY_MPIAIJ,
1634        MatGetSubMatrices_MPIAIJ,
1635        MatIncreaseOverlap_MPIAIJ,
1636        MatGetValues_MPIAIJ,
1637        MatCopy_MPIAIJ,
1638 /*45*/ MatPrintHelp_MPIAIJ,
1639        MatScale_MPIAIJ,
1640        0,
1641        0,
1642        0,
1643 /*50*/ MatGetBlockSize_MPIAIJ,
1644        0,
1645        0,
1646        0,
1647        0,
1648 /*55*/ MatFDColoringCreate_MPIAIJ,
1649        0,
1650        MatSetUnfactored_MPIAIJ,
1651        0,
1652        0,
1653 /*60*/ MatGetSubMatrix_MPIAIJ,
1654        MatDestroy_MPIAIJ,
1655        MatView_MPIAIJ,
1656        MatGetPetscMaps_Petsc,
1657        0,
1658 /*65*/ 0,
1659        0,
1660        0,
1661        0,
1662        0,
1663 /*70*/ 0,
1664        0,
1665        MatSetColoring_MPIAIJ,
1666        MatSetValuesAdic_MPIAIJ,
1667        MatSetValuesAdifor_MPIAIJ,
1668 /*75*/ 0,
1669        0,
1670        0,
1671        0,
1672        0,
1673 /*80*/ 0,
1674        0,
1675        0,
1676        0,
1677 /*84*/ MatLoad_MPIAIJ,
1678        0,
1679        0,
1680        0,
1681        0,
1682 /*89*/ 0,
1683        MatMatMult_MPIAIJ_MPIAIJ,
1684        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1685        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1686        MatPtAP_MPIAIJ_MPIAIJ,
1687 /*94*/ MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1688        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1689 };
1690 
1691 /* ----------------------------------------------------------------------------------------*/
1692 
1693 EXTERN_C_BEGIN
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1696 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1697 {
1698   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1699   PetscErrorCode ierr;
1700 
1701   PetscFunctionBegin;
1702   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1703   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1704   PetscFunctionReturn(0);
1705 }
1706 EXTERN_C_END
1707 
1708 EXTERN_C_BEGIN
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1711 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1712 {
1713   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1714   PetscErrorCode ierr;
1715 
1716   PetscFunctionBegin;
1717   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1718   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1719   PetscFunctionReturn(0);
1720 }
1721 EXTERN_C_END
1722 
1723 #include "petscpc.h"
1724 EXTERN_C_BEGIN
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1727 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1728 {
1729   Mat_MPIAIJ   *b;
1730   PetscErrorCode ierr;
1731   int          i;
1732 
1733   PetscFunctionBegin;
1734   B->preallocated = PETSC_TRUE;
1735   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1736   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1737   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1738   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1739   if (d_nnz) {
1740     for (i=0; i<B->m; i++) {
1741       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1742     }
1743   }
1744   if (o_nnz) {
1745     for (i=0; i<B->m; i++) {
1746       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1747     }
1748   }
1749   b = (Mat_MPIAIJ*)B->data;
1750   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1751   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1752 
1753   PetscFunctionReturn(0);
1754 }
1755 EXTERN_C_END
1756 
1757 /*MC
1758    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1759 
1760    Options Database Keys:
1761 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1762 
1763   Level: beginner
1764 
1765 .seealso: MatCreateMPIAIJ
1766 M*/
1767 
1768 EXTERN_C_BEGIN
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatCreate_MPIAIJ"
1771 PetscErrorCode MatCreate_MPIAIJ(Mat B)
1772 {
1773   Mat_MPIAIJ *b;
1774   PetscErrorCode ierr;
1775   int        i,size;
1776 
1777   PetscFunctionBegin;
1778   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1779 
1780   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1781   B->data         = (void*)b;
1782   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1783   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1784   B->factor       = 0;
1785   B->assembled    = PETSC_FALSE;
1786   B->mapping      = 0;
1787 
1788   B->insertmode      = NOT_SET_VALUES;
1789   b->size            = size;
1790   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1791 
1792   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1793   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1794 
1795   /* the information in the maps duplicates the information computed below, eventually
1796      we should remove the duplicate information that is not contained in the maps */
1797   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1798   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1799 
1800   /* build local table of row and column ownerships */
1801   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1802   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1803   b->cowners = b->rowners + b->size + 2;
1804   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1805   b->rowners[0] = 0;
1806   for (i=2; i<=b->size; i++) {
1807     b->rowners[i] += b->rowners[i-1];
1808   }
1809   b->rstart = b->rowners[b->rank];
1810   b->rend   = b->rowners[b->rank+1];
1811   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1812   b->cowners[0] = 0;
1813   for (i=2; i<=b->size; i++) {
1814     b->cowners[i] += b->cowners[i-1];
1815   }
1816   b->cstart = b->cowners[b->rank];
1817   b->cend   = b->cowners[b->rank+1];
1818 
1819   /* build cache for off array entries formed */
1820   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1821   b->donotstash  = PETSC_FALSE;
1822   b->colmap      = 0;
1823   b->garray      = 0;
1824   b->roworiented = PETSC_TRUE;
1825 
1826   /* stuff used for matrix vector multiply */
1827   b->lvec      = PETSC_NULL;
1828   b->Mvctx     = PETSC_NULL;
1829 
1830   /* stuff for MatGetRow() */
1831   b->rowindices   = 0;
1832   b->rowvalues    = 0;
1833   b->getrowactive = PETSC_FALSE;
1834 
1835   /* Explicitly create 2 MATSEQAIJ matrices. */
1836   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1837   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1838   PetscLogObjectParent(B,b->A);
1839   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1840   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1841   PetscLogObjectParent(B,b->B);
1842 
1843   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1844                                      "MatStoreValues_MPIAIJ",
1845                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1847                                      "MatRetrieveValues_MPIAIJ",
1848                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1849   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1850 				     "MatGetDiagonalBlock_MPIAIJ",
1851                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1852   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1853 				     "MatIsTranspose_MPIAIJ",
1854 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
1855   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1856 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1857 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
1858   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1859 				     "MatDiagonalScaleLocal_MPIAIJ",
1860 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
1861   PetscFunctionReturn(0);
1862 }
1863 EXTERN_C_END
1864 
1865 /*MC
1866    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1867 
1868    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1869    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1870   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1871   for communicators controlling multiple processes.  It is recommended that you call both of
1872   the above preallocation routines for simplicity.
1873 
1874    Options Database Keys:
1875 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1876 
1877   Level: beginner
1878 
1879 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1880 M*/
1881 
1882 EXTERN_C_BEGIN
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatCreate_AIJ"
1885 PetscErrorCode MatCreate_AIJ(Mat A)
1886 {
1887   PetscErrorCode ierr;
1888   int size;
1889 
1890   PetscFunctionBegin;
1891   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1892   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1893   if (size == 1) {
1894     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1895   } else {
1896     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1897   }
1898   PetscFunctionReturn(0);
1899 }
1900 EXTERN_C_END
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1904 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1905 {
1906   Mat        mat;
1907   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   *newmat       = 0;
1912   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1913   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1914   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1915   a    = (Mat_MPIAIJ*)mat->data;
1916 
1917   mat->factor       = matin->factor;
1918   mat->assembled    = PETSC_TRUE;
1919   mat->insertmode   = NOT_SET_VALUES;
1920   mat->preallocated = PETSC_TRUE;
1921 
1922   a->rstart       = oldmat->rstart;
1923   a->rend         = oldmat->rend;
1924   a->cstart       = oldmat->cstart;
1925   a->cend         = oldmat->cend;
1926   a->size         = oldmat->size;
1927   a->rank         = oldmat->rank;
1928   a->donotstash   = oldmat->donotstash;
1929   a->roworiented  = oldmat->roworiented;
1930   a->rowindices   = 0;
1931   a->rowvalues    = 0;
1932   a->getrowactive = PETSC_FALSE;
1933 
1934   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1935   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1936   if (oldmat->colmap) {
1937 #if defined (PETSC_USE_CTABLE)
1938     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1939 #else
1940     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1941     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1942     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1943 #endif
1944   } else a->colmap = 0;
1945   if (oldmat->garray) {
1946     int len;
1947     len  = oldmat->B->n;
1948     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1949     PetscLogObjectMemory(mat,len*sizeof(int));
1950     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1951   } else a->garray = 0;
1952 
1953   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1954   PetscLogObjectParent(mat,a->lvec);
1955   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1956   PetscLogObjectParent(mat,a->Mvctx);
1957   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1958   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1959   PetscLogObjectParent(mat,a->A);
1960   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1961   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1962   PetscLogObjectParent(mat,a->B);
1963   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1964   *newmat = mat;
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #include "petscsys.h"
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatLoad_MPIAIJ"
1972 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1973 {
1974   Mat          A;
1975   PetscScalar  *vals,*svals;
1976   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1977   MPI_Status   status;
1978   PetscErrorCode ierr;
1979   int          i,nz,j,rstart,rend,fd;
1980   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1981   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1982   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1983 
1984   PetscFunctionBegin;
1985   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1986   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1987   if (!rank) {
1988     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1989     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1990     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1991     if (header[3] < 0) {
1992       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1993     }
1994   }
1995 
1996   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1997   M = header[1]; N = header[2];
1998   /* determine ownership of all rows */
1999   m = M/size + ((M % size) > rank);
2000   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2001   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2002   rowners[0] = 0;
2003   for (i=2; i<=size; i++) {
2004     rowners[i] += rowners[i-1];
2005   }
2006   rstart = rowners[rank];
2007   rend   = rowners[rank+1];
2008 
2009   /* distribute row lengths to all processors */
2010   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
2011   offlens = ourlens + (rend-rstart);
2012   if (!rank) {
2013     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
2014     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2015     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2016     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2017     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2018     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2019   } else {
2020     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2021   }
2022 
2023   if (!rank) {
2024     /* calculate the number of nonzeros on each processor */
2025     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2026     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2027     for (i=0; i<size; i++) {
2028       for (j=rowners[i]; j< rowners[i+1]; j++) {
2029         procsnz[i] += rowlengths[j];
2030       }
2031     }
2032     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2033 
2034     /* determine max buffer needed and allocate it */
2035     maxnz = 0;
2036     for (i=0; i<size; i++) {
2037       maxnz = PetscMax(maxnz,procsnz[i]);
2038     }
2039     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2040 
2041     /* read in my part of the matrix column indices  */
2042     nz   = procsnz[0];
2043     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2044     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2045 
2046     /* read in every one elses and ship off */
2047     for (i=1; i<size; i++) {
2048       nz   = procsnz[i];
2049       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2050       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2051     }
2052     ierr = PetscFree(cols);CHKERRQ(ierr);
2053   } else {
2054     /* determine buffer space needed for message */
2055     nz = 0;
2056     for (i=0; i<m; i++) {
2057       nz += ourlens[i];
2058     }
2059     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2060 
2061     /* receive message of column indices*/
2062     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2063     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2064     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2065   }
2066 
2067   /* determine column ownership if matrix is not square */
2068   if (N != M) {
2069     n      = N/size + ((N % size) > rank);
2070     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2071     cstart = cend - n;
2072   } else {
2073     cstart = rstart;
2074     cend   = rend;
2075     n      = cend - cstart;
2076   }
2077 
2078   /* loop over local rows, determining number of off diagonal entries */
2079   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2080   jj = 0;
2081   for (i=0; i<m; i++) {
2082     for (j=0; j<ourlens[i]; j++) {
2083       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2084       jj++;
2085     }
2086   }
2087 
2088   /* create our matrix */
2089   for (i=0; i<m; i++) {
2090     ourlens[i] -= offlens[i];
2091   }
2092   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2093   ierr = MatSetType(A,type);CHKERRQ(ierr);
2094   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2095 
2096   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2097   for (i=0; i<m; i++) {
2098     ourlens[i] += offlens[i];
2099   }
2100 
2101   if (!rank) {
2102     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2103 
2104     /* read in my part of the matrix numerical values  */
2105     nz   = procsnz[0];
2106     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2107 
2108     /* insert into matrix */
2109     jj      = rstart;
2110     smycols = mycols;
2111     svals   = vals;
2112     for (i=0; i<m; i++) {
2113       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2114       smycols += ourlens[i];
2115       svals   += ourlens[i];
2116       jj++;
2117     }
2118 
2119     /* read in other processors and ship out */
2120     for (i=1; i<size; i++) {
2121       nz   = procsnz[i];
2122       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2123       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2124     }
2125     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2126   } else {
2127     /* receive numeric values */
2128     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2129 
2130     /* receive message of values*/
2131     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2132     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2133     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2134 
2135     /* insert into matrix */
2136     jj      = rstart;
2137     smycols = mycols;
2138     svals   = vals;
2139     for (i=0; i<m; i++) {
2140       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2141       smycols += ourlens[i];
2142       svals   += ourlens[i];
2143       jj++;
2144     }
2145   }
2146   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2147   ierr = PetscFree(vals);CHKERRQ(ierr);
2148   ierr = PetscFree(mycols);CHKERRQ(ierr);
2149   ierr = PetscFree(rowners);CHKERRQ(ierr);
2150 
2151   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2152   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2153   *newmat = A;
2154   PetscFunctionReturn(0);
2155 }
2156 
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2159 /*
2160     Not great since it makes two copies of the submatrix, first an SeqAIJ
2161   in local and then by concatenating the local matrices the end result.
2162   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2163 */
2164 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2165 {
2166   PetscErrorCode ierr;
2167   int          i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2168   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2169   Mat          *local,M,Mreuse;
2170   PetscScalar  *vwork,*aa;
2171   MPI_Comm     comm = mat->comm;
2172   Mat_SeqAIJ   *aij;
2173 
2174 
2175   PetscFunctionBegin;
2176   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2177   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2178 
2179   if (call ==  MAT_REUSE_MATRIX) {
2180     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2181     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2182     local = &Mreuse;
2183     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2184   } else {
2185     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2186     Mreuse = *local;
2187     ierr   = PetscFree(local);CHKERRQ(ierr);
2188   }
2189 
2190   /*
2191       m - number of local rows
2192       n - number of columns (same on all processors)
2193       rstart - first row in new global matrix generated
2194   */
2195   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2196   if (call == MAT_INITIAL_MATRIX) {
2197     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2198     ii  = aij->i;
2199     jj  = aij->j;
2200 
2201     /*
2202         Determine the number of non-zeros in the diagonal and off-diagonal
2203         portions of the matrix in order to do correct preallocation
2204     */
2205 
2206     /* first get start and end of "diagonal" columns */
2207     if (csize == PETSC_DECIDE) {
2208       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2209       if (mglobal == n) { /* square matrix */
2210 	nlocal = m;
2211       } else {
2212         nlocal = n/size + ((n % size) > rank);
2213       }
2214     } else {
2215       nlocal = csize;
2216     }
2217     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2218     rstart = rend - nlocal;
2219     if (rank == size - 1 && rend != n) {
2220       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2221     }
2222 
2223     /* next, compute all the lengths */
2224     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2225     olens = dlens + m;
2226     for (i=0; i<m; i++) {
2227       jend = ii[i+1] - ii[i];
2228       olen = 0;
2229       dlen = 0;
2230       for (j=0; j<jend; j++) {
2231         if (*jj < rstart || *jj >= rend) olen++;
2232         else dlen++;
2233         jj++;
2234       }
2235       olens[i] = olen;
2236       dlens[i] = dlen;
2237     }
2238     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2239     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2240     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2241     ierr = PetscFree(dlens);CHKERRQ(ierr);
2242   } else {
2243     int ml,nl;
2244 
2245     M = *newmat;
2246     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2247     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2248     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2249     /*
2250          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2251        rather than the slower MatSetValues().
2252     */
2253     M->was_assembled = PETSC_TRUE;
2254     M->assembled     = PETSC_FALSE;
2255   }
2256   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2257   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2258   ii  = aij->i;
2259   jj  = aij->j;
2260   aa  = aij->a;
2261   for (i=0; i<m; i++) {
2262     row   = rstart + i;
2263     nz    = ii[i+1] - ii[i];
2264     cwork = jj;     jj += nz;
2265     vwork = aa;     aa += nz;
2266     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2267   }
2268 
2269   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2270   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2271   *newmat = M;
2272 
2273   /* save submatrix used in processor for next request */
2274   if (call ==  MAT_INITIAL_MATRIX) {
2275     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2276     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2277   }
2278 
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2284 /*@C
2285    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2286    (the default parallel PETSc format).  For good matrix assembly performance
2287    the user should preallocate the matrix storage by setting the parameters
2288    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2289    performance can be increased by more than a factor of 50.
2290 
2291    Collective on MPI_Comm
2292 
2293    Input Parameters:
2294 +  A - the matrix
2295 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2296            (same value is used for all local rows)
2297 .  d_nnz - array containing the number of nonzeros in the various rows of the
2298            DIAGONAL portion of the local submatrix (possibly different for each row)
2299            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2300            The size of this array is equal to the number of local rows, i.e 'm'.
2301            You must leave room for the diagonal entry even if it is zero.
2302 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2303            submatrix (same value is used for all local rows).
2304 -  o_nnz - array containing the number of nonzeros in the various rows of the
2305            OFF-DIAGONAL portion of the local submatrix (possibly different for
2306            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2307            structure. The size of this array is equal to the number
2308            of local rows, i.e 'm'.
2309 
2310    The AIJ format (also called the Yale sparse matrix format or
2311    compressed row storage), is fully compatible with standard Fortran 77
2312    storage.  That is, the stored row and column indices can begin at
2313    either one (as in Fortran) or zero.  See the users manual for details.
2314 
2315    The user MUST specify either the local or global matrix dimensions
2316    (possibly both).
2317 
2318    The parallel matrix is partitioned such that the first m0 rows belong to
2319    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2320    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2321 
2322    The DIAGONAL portion of the local submatrix of a processor can be defined
2323    as the submatrix which is obtained by extraction the part corresponding
2324    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2325    first row that belongs to the processor, and r2 is the last row belonging
2326    to the this processor. This is a square mxm matrix. The remaining portion
2327    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2328 
2329    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2330 
2331    By default, this format uses inodes (identical nodes) when possible.
2332    We search for consecutive rows with the same nonzero structure, thereby
2333    reusing matrix information to achieve increased efficiency.
2334 
2335    Options Database Keys:
2336 +  -mat_aij_no_inode  - Do not use inodes
2337 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2338 -  -mat_aij_oneindex - Internally use indexing starting at 1
2339         rather than 0.  Note that when calling MatSetValues(),
2340         the user still MUST index entries starting at 0!
2341 
2342    Example usage:
2343 
2344    Consider the following 8x8 matrix with 34 non-zero values, that is
2345    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2346    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2347    as follows:
2348 
2349 .vb
2350             1  2  0  |  0  3  0  |  0  4
2351     Proc0   0  5  6  |  7  0  0  |  8  0
2352             9  0 10  | 11  0  0  | 12  0
2353     -------------------------------------
2354            13  0 14  | 15 16 17  |  0  0
2355     Proc1   0 18  0  | 19 20 21  |  0  0
2356             0  0  0  | 22 23  0  | 24  0
2357     -------------------------------------
2358     Proc2  25 26 27  |  0  0 28  | 29  0
2359            30  0  0  | 31 32 33  |  0 34
2360 .ve
2361 
2362    This can be represented as a collection of submatrices as:
2363 
2364 .vb
2365       A B C
2366       D E F
2367       G H I
2368 .ve
2369 
2370    Where the submatrices A,B,C are owned by proc0, D,E,F are
2371    owned by proc1, G,H,I are owned by proc2.
2372 
2373    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2374    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2375    The 'M','N' parameters are 8,8, and have the same values on all procs.
2376 
2377    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2378    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2379    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2380    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2381    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2382    matrix, ans [DF] as another SeqAIJ matrix.
2383 
2384    When d_nz, o_nz parameters are specified, d_nz storage elements are
2385    allocated for every row of the local diagonal submatrix, and o_nz
2386    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2387    One way to choose d_nz and o_nz is to use the max nonzerors per local
2388    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2389    In this case, the values of d_nz,o_nz are:
2390 .vb
2391      proc0 : dnz = 2, o_nz = 2
2392      proc1 : dnz = 3, o_nz = 2
2393      proc2 : dnz = 1, o_nz = 4
2394 .ve
2395    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2396    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2397    for proc3. i.e we are using 12+15+10=37 storage locations to store
2398    34 values.
2399 
2400    When d_nnz, o_nnz parameters are specified, the storage is specified
2401    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2402    In the above case the values for d_nnz,o_nnz are:
2403 .vb
2404      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2405      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2406      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2407 .ve
2408    Here the space allocated is sum of all the above values i.e 34, and
2409    hence pre-allocation is perfect.
2410 
2411    Level: intermediate
2412 
2413 .keywords: matrix, aij, compressed row, sparse, parallel
2414 
2415 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2416 @*/
2417 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2418 {
2419   PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]);
2420 
2421   PetscFunctionBegin;
2422   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2423   if (f) {
2424     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2425   }
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatCreateMPIAIJ"
2431 /*@C
2432    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2433    (the default parallel PETSc format).  For good matrix assembly performance
2434    the user should preallocate the matrix storage by setting the parameters
2435    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2436    performance can be increased by more than a factor of 50.
2437 
2438    Collective on MPI_Comm
2439 
2440    Input Parameters:
2441 +  comm - MPI communicator
2442 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2443            This value should be the same as the local size used in creating the
2444            y vector for the matrix-vector product y = Ax.
2445 .  n - This value should be the same as the local size used in creating the
2446        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2447        calculated if N is given) For square matrices n is almost always m.
2448 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2449 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2450 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2451            (same value is used for all local rows)
2452 .  d_nnz - array containing the number of nonzeros in the various rows of the
2453            DIAGONAL portion of the local submatrix (possibly different for each row)
2454            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2455            The size of this array is equal to the number of local rows, i.e 'm'.
2456            You must leave room for the diagonal entry even if it is zero.
2457 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2458            submatrix (same value is used for all local rows).
2459 -  o_nnz - array containing the number of nonzeros in the various rows of the
2460            OFF-DIAGONAL portion of the local submatrix (possibly different for
2461            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2462            structure. The size of this array is equal to the number
2463            of local rows, i.e 'm'.
2464 
2465    Output Parameter:
2466 .  A - the matrix
2467 
2468    Notes:
2469    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2470    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2471    storage requirements for this matrix.
2472 
2473    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2474    processor than it must be used on all processors that share the object for
2475    that argument.
2476 
2477    The AIJ format (also called the Yale sparse matrix format or
2478    compressed row storage), is fully compatible with standard Fortran 77
2479    storage.  That is, the stored row and column indices can begin at
2480    either one (as in Fortran) or zero.  See the users manual for details.
2481 
2482    The user MUST specify either the local or global matrix dimensions
2483    (possibly both).
2484 
2485    The parallel matrix is partitioned such that the first m0 rows belong to
2486    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2487    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2488 
2489    The DIAGONAL portion of the local submatrix of a processor can be defined
2490    as the submatrix which is obtained by extraction the part corresponding
2491    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2492    first row that belongs to the processor, and r2 is the last row belonging
2493    to the this processor. This is a square mxm matrix. The remaining portion
2494    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2495 
2496    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2497 
2498    When calling this routine with a single process communicator, a matrix of
2499    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2500    type of communicator, use the construction mechanism:
2501      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2502 
2503    By default, this format uses inodes (identical nodes) when possible.
2504    We search for consecutive rows with the same nonzero structure, thereby
2505    reusing matrix information to achieve increased efficiency.
2506 
2507    Options Database Keys:
2508 +  -mat_aij_no_inode  - Do not use inodes
2509 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2510 -  -mat_aij_oneindex - Internally use indexing starting at 1
2511         rather than 0.  Note that when calling MatSetValues(),
2512         the user still MUST index entries starting at 0!
2513 
2514 
2515    Example usage:
2516 
2517    Consider the following 8x8 matrix with 34 non-zero values, that is
2518    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2519    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2520    as follows:
2521 
2522 .vb
2523             1  2  0  |  0  3  0  |  0  4
2524     Proc0   0  5  6  |  7  0  0  |  8  0
2525             9  0 10  | 11  0  0  | 12  0
2526     -------------------------------------
2527            13  0 14  | 15 16 17  |  0  0
2528     Proc1   0 18  0  | 19 20 21  |  0  0
2529             0  0  0  | 22 23  0  | 24  0
2530     -------------------------------------
2531     Proc2  25 26 27  |  0  0 28  | 29  0
2532            30  0  0  | 31 32 33  |  0 34
2533 .ve
2534 
2535    This can be represented as a collection of submatrices as:
2536 
2537 .vb
2538       A B C
2539       D E F
2540       G H I
2541 .ve
2542 
2543    Where the submatrices A,B,C are owned by proc0, D,E,F are
2544    owned by proc1, G,H,I are owned by proc2.
2545 
2546    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2547    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2548    The 'M','N' parameters are 8,8, and have the same values on all procs.
2549 
2550    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2551    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2552    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2553    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2554    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2555    matrix, ans [DF] as another SeqAIJ matrix.
2556 
2557    When d_nz, o_nz parameters are specified, d_nz storage elements are
2558    allocated for every row of the local diagonal submatrix, and o_nz
2559    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2560    One way to choose d_nz and o_nz is to use the max nonzerors per local
2561    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2562    In this case, the values of d_nz,o_nz are:
2563 .vb
2564      proc0 : dnz = 2, o_nz = 2
2565      proc1 : dnz = 3, o_nz = 2
2566      proc2 : dnz = 1, o_nz = 4
2567 .ve
2568    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2569    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2570    for proc3. i.e we are using 12+15+10=37 storage locations to store
2571    34 values.
2572 
2573    When d_nnz, o_nnz parameters are specified, the storage is specified
2574    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2575    In the above case the values for d_nnz,o_nnz are:
2576 .vb
2577      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2578      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2579      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2580 .ve
2581    Here the space allocated is sum of all the above values i.e 34, and
2582    hence pre-allocation is perfect.
2583 
2584    Level: intermediate
2585 
2586 .keywords: matrix, aij, compressed row, sparse, parallel
2587 
2588 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2589 @*/
2590 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2591 {
2592   PetscErrorCode ierr;
2593   int size;
2594 
2595   PetscFunctionBegin;
2596   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2597   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2598   if (size > 1) {
2599     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2600     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2601   } else {
2602     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2603     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2604   }
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 #undef __FUNCT__
2609 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2610 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2611 {
2612   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2613   PetscFunctionBegin;
2614   *Ad     = a->A;
2615   *Ao     = a->B;
2616   *colmap = a->garray;
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 #undef __FUNCT__
2621 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2622 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2623 {
2624   PetscErrorCode ierr;
2625   int        i;
2626   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2627 
2628   PetscFunctionBegin;
2629   if (coloring->ctype == IS_COLORING_LOCAL) {
2630     ISColoringValue *allcolors,*colors;
2631     ISColoring      ocoloring;
2632 
2633     /* set coloring for diagonal portion */
2634     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2635 
2636     /* set coloring for off-diagonal portion */
2637     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2638     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2639     for (i=0; i<a->B->n; i++) {
2640       colors[i] = allcolors[a->garray[i]];
2641     }
2642     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2643     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2644     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2645     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2646   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2647     ISColoringValue *colors;
2648     int             *larray;
2649     ISColoring      ocoloring;
2650 
2651     /* set coloring for diagonal portion */
2652     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2653     for (i=0; i<a->A->n; i++) {
2654       larray[i] = i + a->cstart;
2655     }
2656     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2657     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2658     for (i=0; i<a->A->n; i++) {
2659       colors[i] = coloring->colors[larray[i]];
2660     }
2661     ierr = PetscFree(larray);CHKERRQ(ierr);
2662     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2663     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2664     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2665 
2666     /* set coloring for off-diagonal portion */
2667     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2668     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2669     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2670     for (i=0; i<a->B->n; i++) {
2671       colors[i] = coloring->colors[larray[i]];
2672     }
2673     ierr = PetscFree(larray);CHKERRQ(ierr);
2674     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2675     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2676     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2677   } else {
2678     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2679   }
2680 
2681   PetscFunctionReturn(0);
2682 }
2683 
2684 #undef __FUNCT__
2685 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2686 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2687 {
2688   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2689   PetscErrorCode ierr;
2690 
2691   PetscFunctionBegin;
2692   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2693   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2694   PetscFunctionReturn(0);
2695 }
2696 
2697 #undef __FUNCT__
2698 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2699 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2700 {
2701   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2702   PetscErrorCode ierr;
2703 
2704   PetscFunctionBegin;
2705   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2706   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatMerge"
2712 /*@C
2713       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2714                  matrices from each processor
2715 
2716     Collective on MPI_Comm
2717 
2718    Input Parameters:
2719 +    comm - the communicators the parallel matrix will live on
2720 .    inmat - the input sequential matrices
2721 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2722 
2723    Output Parameter:
2724 .    outmat - the parallel matrix generated
2725 
2726     Level: advanced
2727 
2728    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2729 
2730 @*/
2731 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,MatReuse scall,Mat *outmat)
2732 {
2733   PetscErrorCode ierr;
2734   int               m,n,i,rstart,nnz,I,*dnz,*onz;
2735   const int         *indx;
2736   const PetscScalar *values;
2737   PetscMap          columnmap,rowmap;
2738 
2739   PetscFunctionBegin;
2740 
2741   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2742   if (scall == MAT_INITIAL_MATRIX){
2743     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2744     ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2745     ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2746     ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2747     ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2748     ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2749 
2750     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2751     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2752     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2753     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2754     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2755 
2756     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2757     for (i=0;i<m;i++) {
2758       ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2759       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2760       ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2761     }
2762     /* This routine will ONLY return MPIAIJ type matrix */
2763     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2764     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2765     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2766     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2767 
2768   } else if (scall == MAT_REUSE_MATRIX){
2769     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2770   } else {
2771     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2772   }
2773 
2774   for (i=0;i<m;i++) {
2775     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2776     I    = i + rstart;
2777     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2778     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2779   }
2780   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2781   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2782   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2783 
2784   PetscFunctionReturn(0);
2785 }
2786 
2787 #undef __FUNCT__
2788 #define __FUNCT__ "MatFileSplit"
2789 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2790 {
2791   PetscErrorCode ierr;
2792   int               rank,m,N,i,rstart,nnz;
2793   size_t            len;
2794   const int         *indx;
2795   PetscViewer       out;
2796   char              *name;
2797   Mat               B;
2798   const PetscScalar *values;
2799 
2800   PetscFunctionBegin;
2801 
2802   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2803   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2804   /* Should this be the type of the diagonal block of A? */
2805   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2806   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2807   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2808   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2809   for (i=0;i<m;i++) {
2810     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2811     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2812     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2813   }
2814   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2815   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2816 
2817   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2818   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2819   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2820   sprintf(name,"%s.%d",outfile,rank);
2821   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2822   ierr = PetscFree(name);
2823   ierr = MatView(B,out);CHKERRQ(ierr);
2824   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2825   ierr = MatDestroy(B);CHKERRQ(ierr);
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 typedef struct { /* used by MatMerge_SeqsToMPI for reusing the merged matrix */
2830   PetscMap  rowmap;
2831   int       nrecv,nsend,*id_r,*bi,*bj,**ijbuf_r;
2832   int       *len_sra; /* array of length 2*size, len_sra[i]/len_sra[size+i]
2833                          store length of i-th send/recv matrix values */
2834   MatScalar *abuf_s;
2835 } Mat_Merge_SeqsToMPI;
2836 
2837 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2838 #undef __FUNCT__
2839 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2840 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2841 {
2842   PetscErrorCode ierr;
2843   Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)A->spptr;
2844 
2845   PetscFunctionBegin;
2846   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2847   ierr = PetscFree(merge->abuf_s);CHKERRQ(ierr);
2848   ierr = PetscFree(merge->len_sra);CHKERRQ(ierr);
2849   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2850   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2851   ierr = PetscFree(merge->ijbuf_r);CHKERRQ(ierr);
2852   ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2853   ierr = PetscFree(merge);CHKERRQ(ierr);
2854 
2855   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 #include "src/mat/utils/freespace.h"
2860 #undef __FUNCT__
2861 #define __FUNCT__ "MatMerge_SeqsToMPI"
2862 /*@C
2863       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2864                  matrices from each processor
2865 
2866     Collective on MPI_Comm
2867 
2868    Input Parameters:
2869 +    comm - the communicators the parallel matrix will live on
2870 .    seqmat - the input sequential matrices
2871 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2872 
2873    Output Parameter:
2874 .    mpimat - the parallel matrix generated
2875 
2876     Level: advanced
2877 
2878    Notes: The dimensions of the sequential matrix in each processor
2879       MUST be the same.
2880 
2881 @*/
2882 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,MatReuse scall,Mat *mpimat)
2883 {
2884   PetscErrorCode    ierr;
2885   Mat               B_seq,B_mpi;
2886   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)seqmat->data;
2887   int               size,rank,M=seqmat->m,N=seqmat->n,m,i,j,*owners,
2888                     *ai=a->i,*aj=a->j,tag,taga,len,len_a,*len_s,*len_sa,proc,*len_r,*len_ra,
2889                     **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i,
2890                     *bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj;
2891   MPI_Request       *s_waits,*r_waits,*s_waitsa,*r_waitsa;
2892   MPI_Status        *status;
2893   MatScalar         *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i;
2894   FreeSpaceList     free_space=PETSC_NULL,current_space=PETSC_NULL;
2895   Mat_Merge_SeqsToMPI *merge;
2896 
2897   PetscFunctionBegin;
2898   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2899   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2900   if (scall == MAT_INITIAL_MATRIX){
2901     ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
2902   } else if (scall == MAT_REUSE_MATRIX){
2903     B_mpi = *mpimat;
2904     merge = (Mat_Merge_SeqsToMPI*)B_mpi->spptr;
2905     bi      = merge->bi;
2906     bj      = merge->bj;
2907     ijbuf_r = merge->ijbuf_r;
2908   } else {
2909     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2910   }
2911   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2912 
2913   /* determine the number of messages to send, their lengths */
2914   /*---------------------------------------------------------*/
2915   if (scall == MAT_INITIAL_MATRIX){
2916     ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
2917     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
2918     ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
2919     ierr = PetscMalloc(size*sizeof(int),&len_s);CHKERRQ(ierr);
2920     ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr);
2921   }
2922   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2923   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2924 
2925   len_sa  = merge->len_sra;
2926   len_ra = merge->len_sra + size;
2927 
2928   if (scall == MAT_INITIAL_MATRIX){
2929     merge->nsend = 0;
2930     len = len_a = 0;
2931     for (proc=0; proc<size; proc++){
2932       if (proc == rank) continue;
2933       len_s[proc] = len_sa[proc] = 0;
2934       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
2935         len_sa[proc] += ai[i+1] - ai[i]; /* num of cols sent to [proc] */
2936       }
2937       if (len_sa[proc]) {
2938         merge->nsend++;
2939         len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc];
2940         if (len < len_s[proc]) len = len_s[proc];
2941         if (len_a < len_sa[proc]) len_a = len_sa[proc];
2942       }
2943     }
2944 
2945     /* determine the number and length of messages to receive */
2946     /*--------------------------------------------------------*/
2947     ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
2948     ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr);
2949 
2950     /* post the Irecvs corresponding to these messages */
2951     /*-------------------------------------------------*/
2952     /* get new tag to keep the communication clean */
2953     ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr);
2954     ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr);
2955 
2956     /* post the sends */
2957     /*----------------*/
2958     ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2959     ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr);
2960     k = 0;
2961     for (proc=0; proc<size; proc++){
2962       if (!len_s[proc]) continue;
2963       /* form outgoing ij data to be sent to [proc] */
2964       nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc];
2965       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
2966         anzi = ai[i+1] - ai[i];
2967         if (i==owners[proc]){
2968           ijbuf_s[i-owners[proc]] = anzi;
2969         } else {
2970           ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi;
2971         }
2972         for (j=0; j <anzi; j++){
2973           *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++;
2974       }
2975       }
2976       ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr);
2977       k++;
2978     }
2979 
2980     /* receives and sends of ij-structure are complete, deallocate memories */
2981     /*----------------------------------------------------------------------*/
2982     ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2983     ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2984 
2985     ierr = PetscFree(ijbuf_s);CHKERRQ(ierr);
2986     ierr = PetscFree(s_waits);CHKERRQ(ierr);
2987     ierr = PetscFree(r_waits);CHKERRQ(ierr);
2988   }
2989 
2990   /* send and recv matrix values */
2991   /*-----------------------------*/
2992   if (scall == MAT_INITIAL_MATRIX){
2993     ierr = PetscMalloc((len_a+1)*sizeof(MatScalar),&merge->abuf_s);CHKERRQ(ierr);
2994     for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */
2995   }
2996   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2997   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr);
2998 
2999   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr);
3000   k = 0;
3001   for (proc=0; proc<size; proc++){
3002     if (!len_sa[proc]) continue;
3003     /* form outgoing mat value data to be sent to [proc] */
3004     abuf_i = merge->abuf_s;
3005     for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
3006       anzi = ai[i+1] - ai[i];
3007       for (j=0; j <anzi; j++){
3008         *abuf_i = *(aa+ai[i]+j); abuf_i++;
3009       }
3010     }
3011     ierr = MPI_Isend(merge->abuf_s,len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr);
3012     k++;
3013   }
3014 
3015   ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr);
3016   ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr);
3017   ierr = PetscFree(status);CHKERRQ(ierr);
3018 
3019   ierr = PetscFree(s_waitsa);CHKERRQ(ierr);
3020   ierr = PetscFree(r_waitsa);CHKERRQ(ierr);
3021 
3022   /* create seq matrix B_seq in each processor */
3023   /*-------------------------------------------*/
3024   if (scall == MAT_INITIAL_MATRIX){
3025     ierr = PetscFree(len_s);CHKERRQ(ierr);
3026     /* allocate bi array and free space for accumulating nonzero column info */
3027     ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr);
3028     bi[0] = 0;
3029 
3030     ierr = PetscMalloc((N+1)*sizeof(int),&lnk);CHKERRQ(ierr);
3031     ierr = PetscLLInitialize(N,N);CHKERRQ(ierr);
3032 
3033     /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */
3034     len = 0;
3035     for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i];
3036     ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr);
3037     current_space = free_space;
3038 
3039     /* determine symbolic info for each row of B_seq */
3040     for (i=0;i<m;i++) {
3041       bnzi   = 0;
3042       /* add local non-zero cols of this proc's seqmat into lnk */
3043       arow   = owners[rank] + i;
3044       anzi   = ai[arow+1] - ai[arow];
3045       aj     = a->j + ai[arow];
3046       ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk);CHKERRQ(ierr);
3047       bnzi += nlnk;
3048       /* add received col data into lnk */
3049       for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3050         /* i-th row */
3051         if (i == 0){
3052           anzi = *(ijbuf_r[k]+i);
3053           aj   = ijbuf_r[k] + m;
3054         } else {
3055           anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1);
3056           aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3057         }
3058         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk);CHKERRQ(ierr);
3059         bnzi += nlnk;
3060       }
3061 
3062       /* if free space is not available, make more free space */
3063       if (current_space->local_remaining<bnzi) {
3064         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3065         nspacedouble++;
3066       }
3067       /* copy data into free space, then initialize lnk */
3068       ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array);CHKERRQ(ierr);
3069       current_space->array           += bnzi;
3070       current_space->local_used      += bnzi;
3071       current_space->local_remaining -= bnzi;
3072 
3073       bi[i+1] = bi[i] + bnzi;
3074     }
3075     ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr);
3076     ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3077 
3078     ierr = PetscFree(lnk);CHKERRQ(ierr);
3079     ierr = PetscFree(len_r);CHKERRQ(ierr);
3080 
3081     /* allocate space for ba */
3082     ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3083     ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3084 
3085     /* put together B_seq */
3086     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr);
3087     ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */
3088 
3089     /* create symbolic parallel matrix B_mpi by concatinating B_seq */
3090     /*--------------------------------------------------------------*/
3091     ierr = MatMerge(comm,B_seq,scall,&B_mpi);CHKERRQ(ierr);
3092   } /* endof if (scall == MAT_INITIAL_MATRIX) */
3093 
3094   /* insert mat values of B_mpi */
3095   /*----------------------------*/
3096   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3097 
3098   /* set values of ba */
3099   for (i=0; i<m; i++) {
3100     arow = owners[rank] + i;
3101     bj_i = bj+bi[i];  /* col indices of the i-th row of B_seq */
3102     bnzi = bi[i+1] - bi[i];
3103     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3104 
3105     /* add local non-zero vals of this proc's seqmat into ba */
3106     anzi = ai[arow+1] - ai[arow];
3107     aj   = a->j + ai[arow];
3108     aa   = a->a + ai[arow];
3109     nextaj = 0;
3110     for (j=0; nextaj<anzi; j++){
3111       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3112         ba_i[j] += aa[nextaj++];
3113       }
3114     }
3115 
3116     /* add received vals into ba */
3117     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3118       /* i-th row */
3119       if (i == 0){
3120         anzi = *(ijbuf_r[k]+i);
3121         aj   = ijbuf_r[k] + m;
3122         aa   = abuf_r[k];
3123       } else {
3124         anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1);
3125         aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3126         aa   = abuf_r[k] + *(ijbuf_r[k]+i-1);
3127       }
3128       nextaj = 0;
3129       for (j=0; nextaj<anzi; j++){
3130         if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3131           ba_i[j] += aa[nextaj++];
3132         }
3133       }
3134     }
3135 
3136     ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3137   }
3138   ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3139   ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3140 
3141   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3142   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3143 
3144   /* attach the supporting struct to B_mpi for reuse */
3145   /*-------------------------------------------------*/
3146   if (scall == MAT_INITIAL_MATRIX){
3147     B_mpi->spptr         = (void*)merge;
3148     B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3149     merge->bi            = bi;
3150     merge->bj            = bj;
3151     merge->ijbuf_r       = ijbuf_r;
3152 
3153     *mpimat = B_mpi;
3154   }
3155   PetscFunctionReturn(0);
3156 }
3157