xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 09f3b4e5628a00a1eaf17d80982cfbcc515cc9c1)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
141   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->m;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
246   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscVerboseInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
358   b    = (Mat_SeqAIJ *)aij->B->data;
359 
360   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
361     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
362   }
363   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
364   b->compressedrow.use = PETSC_TRUE;
365   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
366   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
367 
368   if (aij->rowvalues) {
369     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
370     aij->rowvalues = 0;
371   }
372 
373   /* used by MatAXPY() */
374   a->xtoy = 0; b->xtoy = 0;
375   a->XtoY = 0; b->XtoY = 0;
376 
377   PetscFunctionReturn(0);
378 }
379 
380 #undef __FUNCT__
381 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
382 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
383 {
384   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
385   PetscErrorCode ierr;
386 
387   PetscFunctionBegin;
388   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
389   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatZeroRows_MPIAIJ"
395 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
396 {
397   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
398   PetscErrorCode ierr;
399   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
400   PetscInt       i,*owners = l->rowners;
401   PetscInt       *nprocs,j,idx,nsends,row;
402   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
403   PetscInt       *rvalues,count,base,slen,*source;
404   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
405   MPI_Comm       comm = A->comm;
406   MPI_Request    *send_waits,*recv_waits;
407   MPI_Status     recv_status,*send_status;
408 #if defined(PETSC_DEBUG)
409   PetscTruth     found = PETSC_FALSE;
410 #endif
411 
412   PetscFunctionBegin;
413   /*  first count number of contributors to each processor */
414   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
415   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
416   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
417   j = 0;
418   for (i=0; i<N; i++) {
419     if (lastidx > (idx = rows[i])) j = 0;
420     lastidx = idx;
421     for (; j<size; j++) {
422       if (idx >= owners[j] && idx < owners[j+1]) {
423         nprocs[2*j]++;
424         nprocs[2*j+1] = 1;
425         owner[i] = j;
426 #if defined(PETSC_DEBUG)
427         found = PETSC_TRUE;
428 #endif
429         break;
430       }
431     }
432 #if defined(PETSC_DEBUG)
433     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
434     found = PETSC_FALSE;
435 #endif
436   }
437   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
438 
439   /* inform other processors of number of messages and max length*/
440   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
441 
442   /* post receives:   */
443   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
444   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
445   for (i=0; i<nrecvs; i++) {
446     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
447   }
448 
449   /* do sends:
450       1) starts[i] gives the starting index in svalues for stuff going to
451          the ith processor
452   */
453   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
454   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
455   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
456   starts[0] = 0;
457   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
458   for (i=0; i<N; i++) {
459     svalues[starts[owner[i]]++] = rows[i];
460   }
461 
462   starts[0] = 0;
463   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
464   count = 0;
465   for (i=0; i<size; i++) {
466     if (nprocs[2*i+1]) {
467       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
468     }
469   }
470   ierr = PetscFree(starts);CHKERRQ(ierr);
471 
472   base = owners[rank];
473 
474   /*  wait on receives */
475   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
476   source = lens + nrecvs;
477   count  = nrecvs; slen = 0;
478   while (count) {
479     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
480     /* unpack receives into our local space */
481     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
482     source[imdex]  = recv_status.MPI_SOURCE;
483     lens[imdex]    = n;
484     slen          += n;
485     count--;
486   }
487   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
488 
489   /* move the data into the send scatter */
490   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
491   count = 0;
492   for (i=0; i<nrecvs; i++) {
493     values = rvalues + i*nmax;
494     for (j=0; j<lens[i]; j++) {
495       lrows[count++] = values[j] - base;
496     }
497   }
498   ierr = PetscFree(rvalues);CHKERRQ(ierr);
499   ierr = PetscFree(lens);CHKERRQ(ierr);
500   ierr = PetscFree(owner);CHKERRQ(ierr);
501   ierr = PetscFree(nprocs);CHKERRQ(ierr);
502 
503   /* actually zap the local rows */
504   /*
505         Zero the required rows. If the "diagonal block" of the matrix
506      is square and the user wishes to set the diagonal we use separate
507      code so that MatSetValues() is not called for each diagonal allocating
508      new memory, thus calling lots of mallocs and slowing things down.
509 
510        Contributed by: Matthew Knepley
511   */
512   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
513   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
514   if ((diag != 0.0) && (l->A->M == l->A->N)) {
515     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
516   } else if (diag != 0.0) {
517     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
518     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
519       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
520 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
521     }
522     for (i = 0; i < slen; i++) {
523       row  = lrows[i] + rstart;
524       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
525     }
526     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
527     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   } else {
529     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
530   }
531   ierr = PetscFree(lrows);CHKERRQ(ierr);
532 
533   /* wait on sends */
534   if (nsends) {
535     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
536     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
537     ierr = PetscFree(send_status);CHKERRQ(ierr);
538   }
539   ierr = PetscFree(send_waits);CHKERRQ(ierr);
540   ierr = PetscFree(svalues);CHKERRQ(ierr);
541 
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatMult_MPIAIJ"
547 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
548 {
549   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551   PetscInt       nt;
552 
553   PetscFunctionBegin;
554   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
555   if (nt != A->n) {
556     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
557   }
558   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
559   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
560   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
561   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
562   PetscFunctionReturn(0);
563 }
564 
565 #undef __FUNCT__
566 #define __FUNCT__ "MatMultAdd_MPIAIJ"
567 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
568 {
569   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
570   PetscErrorCode ierr;
571 
572   PetscFunctionBegin;
573   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
574   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
575   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
576   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
582 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
583 {
584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
585   PetscErrorCode ierr;
586   PetscTruth     merged;
587 
588   PetscFunctionBegin;
589   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
590   /* do nondiagonal part */
591   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
592   if (!merged) {
593     /* send it on its way */
594     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
595     /* do local part */
596     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
597     /* receive remote parts: note this assumes the values are not actually */
598     /* added in yy until the next line, */
599     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
600   } else {
601     /* do local part */
602     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
603     /* send it on its way */
604     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
605     /* values actually were received in the Begin() but we need to call this nop */
606     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 EXTERN_C_BEGIN
612 #undef __FUNCT__
613 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
614 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
615 {
616   MPI_Comm       comm;
617   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
618   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
619   IS             Me,Notme;
620   PetscErrorCode ierr;
621   PetscInt       M,N,first,last,*notme,i;
622   PetscMPIInt    size;
623 
624   PetscFunctionBegin;
625 
626   /* Easy test: symmetric diagonal block */
627   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
628   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
629   if (!*f) PetscFunctionReturn(0);
630   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
631   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
632   if (size == 1) PetscFunctionReturn(0);
633 
634   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
635   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
636   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
637   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
638   for (i=0; i<first; i++) notme[i] = i;
639   for (i=last; i<M; i++) notme[i-last+first] = i;
640   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
641   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
642   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
643   Aoff = Aoffs[0];
644   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
645   Boff = Boffs[0];
646   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
647   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
648   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
649   ierr = ISDestroy(Me);CHKERRQ(ierr);
650   ierr = ISDestroy(Notme);CHKERRQ(ierr);
651 
652   PetscFunctionReturn(0);
653 }
654 EXTERN_C_END
655 
656 #undef __FUNCT__
657 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
658 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
659 {
660   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
661   PetscErrorCode ierr;
662 
663   PetscFunctionBegin;
664   /* do nondiagonal part */
665   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
666   /* send it on its way */
667   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   /* do local part */
669   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
670   /* receive remote parts */
671   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676   This only works correctly for square matrices where the subblock A->A is the
677    diagonal block
678 */
679 #undef __FUNCT__
680 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
681 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
682 {
683   PetscErrorCode ierr;
684   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
685 
686   PetscFunctionBegin;
687   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
688   if (a->rstart != a->cstart || a->rend != a->cend) {
689     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
690   }
691   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatScale_MPIAIJ"
697 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
698 {
699   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
700   PetscErrorCode ierr;
701 
702   PetscFunctionBegin;
703   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
704   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatDestroy_MPIAIJ"
710 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
711 {
712   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
713   PetscErrorCode ierr;
714 
715   PetscFunctionBegin;
716 #if defined(PETSC_USE_LOG)
717   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
718 #endif
719   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
720   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
721   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
722   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
723 #if defined (PETSC_USE_CTABLE)
724   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
725 #else
726   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
727 #endif
728   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
729   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
730   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
731   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
732   ierr = PetscFree(aij);CHKERRQ(ierr);
733 
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
737   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
738   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
739   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
740   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatView_MPIAIJ_Binary"
746 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
747 {
748   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
749   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
750   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
751   PetscErrorCode    ierr;
752   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
753   int               fd;
754   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
755   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
756   PetscScalar       *column_values;
757 
758   PetscFunctionBegin;
759   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
760   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
761   nz   = A->nz + B->nz;
762   if (!rank) {
763     header[0] = MAT_FILE_COOKIE;
764     header[1] = mat->M;
765     header[2] = mat->N;
766     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
767     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
768     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
769     /* get largest number of rows any processor has */
770     rlen = mat->m;
771     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
772     for (i=1; i<size; i++) {
773       rlen = PetscMax(rlen,range[i+1] - range[i]);
774     }
775   } else {
776     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
777     rlen = mat->m;
778   }
779 
780   /* load up the local row counts */
781   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
782   for (i=0; i<mat->m; i++) {
783     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
784   }
785 
786   /* store the row lengths to the file */
787   if (!rank) {
788     MPI_Status status;
789     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     for (i=1; i<size; i++) {
791       rlen = range[i+1] - range[i];
792       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
793       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
794     }
795   } else {
796     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
797   }
798   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
799 
800   /* load up the local column indices */
801   nzmax = nz; /* )th processor needs space a largest processor needs */
802   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
803   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
804   cnt  = 0;
805   for (i=0; i<mat->m; i++) {
806     for (j=B->i[i]; j<B->i[i+1]; j++) {
807       if ( (col = garray[B->j[j]]) > cstart) break;
808       column_indices[cnt++] = col;
809     }
810     for (k=A->i[i]; k<A->i[i+1]; k++) {
811       column_indices[cnt++] = A->j[k] + cstart;
812     }
813     for (; j<B->i[i+1]; j++) {
814       column_indices[cnt++] = garray[B->j[j]];
815     }
816   }
817   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
818 
819   /* store the column indices to the file */
820   if (!rank) {
821     MPI_Status status;
822     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
823     for (i=1; i<size; i++) {
824       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
825       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
826       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
827       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
828     }
829   } else {
830     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
831     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
832   }
833   ierr = PetscFree(column_indices);CHKERRQ(ierr);
834 
835   /* load up the local column values */
836   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
837   cnt  = 0;
838   for (i=0; i<mat->m; i++) {
839     for (j=B->i[i]; j<B->i[i+1]; j++) {
840       if ( garray[B->j[j]] > cstart) break;
841       column_values[cnt++] = B->a[j];
842     }
843     for (k=A->i[i]; k<A->i[i+1]; k++) {
844       column_values[cnt++] = A->a[k];
845     }
846     for (; j<B->i[i+1]; j++) {
847       column_values[cnt++] = B->a[j];
848     }
849   }
850   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
851 
852   /* store the column values to the file */
853   if (!rank) {
854     MPI_Status status;
855     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
856     for (i=1; i<size; i++) {
857       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
858       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
859       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
860       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
861     }
862   } else {
863     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
864     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
865   }
866   ierr = PetscFree(column_values);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
872 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
873 {
874   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
875   PetscErrorCode    ierr;
876   PetscMPIInt       rank = aij->rank,size = aij->size;
877   PetscTruth        isdraw,iascii,isbinary;
878   PetscViewer       sviewer;
879   PetscViewerFormat format;
880 
881   PetscFunctionBegin;
882   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
883   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
884   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
885   if (iascii) {
886     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
887     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
888       MatInfo    info;
889       PetscTruth inodes;
890 
891       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
892       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
893       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
894       if (!inodes) {
895         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
896 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
897       } else {
898         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
899 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
900       }
901       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
902       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
903       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
904       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
905       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
906       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
907       PetscFunctionReturn(0);
908     } else if (format == PETSC_VIEWER_ASCII_INFO) {
909       PetscInt   inodecount,inodelimit,*inodes;
910       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
911       if (inodes) {
912         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
913       } else {
914         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
915       }
916       PetscFunctionReturn(0);
917     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
918       PetscFunctionReturn(0);
919     }
920   } else if (isbinary) {
921     if (size == 1) {
922       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
923       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
924     } else {
925       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
926     }
927     PetscFunctionReturn(0);
928   } else if (isdraw) {
929     PetscDraw  draw;
930     PetscTruth isnull;
931     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
932     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
933   }
934 
935   if (size == 1) {
936     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
937     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
938   } else {
939     /* assemble the entire matrix onto first processor. */
940     Mat         A;
941     Mat_SeqAIJ  *Aloc;
942     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
943     PetscScalar *a;
944 
945     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
946     if (!rank) {
947       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
948     } else {
949       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
950     }
951     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
952     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
953     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
954     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
955 
956     /* copy over the A part */
957     Aloc = (Mat_SeqAIJ*)aij->A->data;
958     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
959     row = aij->rstart;
960     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
961     for (i=0; i<m; i++) {
962       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
963       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
964     }
965     aj = Aloc->j;
966     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
967 
968     /* copy over the B part */
969     Aloc = (Mat_SeqAIJ*)aij->B->data;
970     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
971     row  = aij->rstart;
972     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
973     ct   = cols;
974     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
975     for (i=0; i<m; i++) {
976       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
977       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
978     }
979     ierr = PetscFree(ct);CHKERRQ(ierr);
980     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
982     /*
983        Everyone has to call to draw the matrix since the graphics waits are
984        synchronized across all processors that share the PetscDraw object
985     */
986     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     if (!rank) {
988       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
989       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
990     }
991     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
992     ierr = MatDestroy(A);CHKERRQ(ierr);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 #undef __FUNCT__
998 #define __FUNCT__ "MatView_MPIAIJ"
999 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1000 {
1001   PetscErrorCode ierr;
1002   PetscTruth     iascii,isdraw,issocket,isbinary;
1003 
1004   PetscFunctionBegin;
1005   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1006   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1007   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1008   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1009   if (iascii || isdraw || isbinary || issocket) {
1010     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1011   } else {
1012     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatRelax_MPIAIJ"
1021 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1022 {
1023   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1024   PetscErrorCode ierr;
1025   Vec            bb1;
1026 
1027   PetscFunctionBegin;
1028   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1029 
1030   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1031 
1032   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1033     if (flag & SOR_ZERO_INITIAL_GUESS) {
1034       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1035       its--;
1036     }
1037 
1038     while (its--) {
1039       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1040       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1041 
1042       /* update rhs: bb1 = bb - B*x */
1043       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1044       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1045 
1046       /* local sweep */
1047       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1048       CHKERRQ(ierr);
1049     }
1050   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1051     if (flag & SOR_ZERO_INITIAL_GUESS) {
1052       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1053       its--;
1054     }
1055     while (its--) {
1056       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1057       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1058 
1059       /* update rhs: bb1 = bb - B*x */
1060       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1061       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1062 
1063       /* local sweep */
1064       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1065       CHKERRQ(ierr);
1066     }
1067   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1068     if (flag & SOR_ZERO_INITIAL_GUESS) {
1069       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1070       its--;
1071     }
1072     while (its--) {
1073       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1074       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1075 
1076       /* update rhs: bb1 = bb - B*x */
1077       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1078       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1079 
1080       /* local sweep */
1081       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1082       CHKERRQ(ierr);
1083     }
1084   } else {
1085     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1086   }
1087 
1088   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatPermute_MPIAIJ"
1094 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1095 {
1096   MPI_Comm       comm,pcomm;
1097   PetscInt       first,local_size,nrows,*rows;
1098   int            ntids;
1099   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1104   /* make a collective version of 'rowp' */
1105   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1106   if (pcomm==comm) {
1107     crowp = rowp;
1108   } else {
1109     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1110     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1111     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1112     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1113   }
1114   /* collect the global row permutation and invert it */
1115   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1116   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1117   if (pcomm!=comm) {
1118     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1119   }
1120   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1121   /* get the local target indices */
1122   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1123   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1124   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1125   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1126   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1127   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1128   /* the column permutation is so much easier;
1129      make a local version of 'colp' and invert it */
1130   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1131   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1132   if (ntids==1) {
1133     lcolp = colp;
1134   } else {
1135     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1136     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1137     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1138   }
1139   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1140   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1141   if (ntids>1) {
1142     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1143     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1144   }
1145   /* now we just get the submatrix */
1146   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1147   /* clean up */
1148   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1149   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1155 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1156 {
1157   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1158   Mat            A = mat->A,B = mat->B;
1159   PetscErrorCode ierr;
1160   PetscReal      isend[5],irecv[5];
1161 
1162   PetscFunctionBegin;
1163   info->block_size     = 1.0;
1164   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1165   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1166   isend[3] = info->memory;  isend[4] = info->mallocs;
1167   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1168   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1169   isend[3] += info->memory;  isend[4] += info->mallocs;
1170   if (flag == MAT_LOCAL) {
1171     info->nz_used      = isend[0];
1172     info->nz_allocated = isend[1];
1173     info->nz_unneeded  = isend[2];
1174     info->memory       = isend[3];
1175     info->mallocs      = isend[4];
1176   } else if (flag == MAT_GLOBAL_MAX) {
1177     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1178     info->nz_used      = irecv[0];
1179     info->nz_allocated = irecv[1];
1180     info->nz_unneeded  = irecv[2];
1181     info->memory       = irecv[3];
1182     info->mallocs      = irecv[4];
1183   } else if (flag == MAT_GLOBAL_SUM) {
1184     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1185     info->nz_used      = irecv[0];
1186     info->nz_allocated = irecv[1];
1187     info->nz_unneeded  = irecv[2];
1188     info->memory       = irecv[3];
1189     info->mallocs      = irecv[4];
1190   }
1191   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1192   info->fill_ratio_needed = 0;
1193   info->factor_mallocs    = 0;
1194   info->rows_global       = (double)matin->M;
1195   info->columns_global    = (double)matin->N;
1196   info->rows_local        = (double)matin->m;
1197   info->columns_local     = (double)matin->N;
1198 
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatSetOption_MPIAIJ"
1204 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1205 {
1206   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210   switch (op) {
1211   case MAT_NO_NEW_NONZERO_LOCATIONS:
1212   case MAT_YES_NEW_NONZERO_LOCATIONS:
1213   case MAT_COLUMNS_UNSORTED:
1214   case MAT_COLUMNS_SORTED:
1215   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1216   case MAT_KEEP_ZEROED_ROWS:
1217   case MAT_NEW_NONZERO_LOCATION_ERR:
1218   case MAT_USE_INODES:
1219   case MAT_DO_NOT_USE_INODES:
1220   case MAT_IGNORE_ZERO_ENTRIES:
1221     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1222     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1223     break;
1224   case MAT_ROW_ORIENTED:
1225     a->roworiented = PETSC_TRUE;
1226     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1227     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1228     break;
1229   case MAT_ROWS_SORTED:
1230   case MAT_ROWS_UNSORTED:
1231   case MAT_YES_NEW_DIAGONALS:
1232     ierr = PetscVerboseInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr);
1233     break;
1234   case MAT_COLUMN_ORIENTED:
1235     a->roworiented = PETSC_FALSE;
1236     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1237     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1238     break;
1239   case MAT_IGNORE_OFF_PROC_ENTRIES:
1240     a->donotstash = PETSC_TRUE;
1241     break;
1242   case MAT_NO_NEW_DIAGONALS:
1243     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1244   case MAT_SYMMETRIC:
1245   case MAT_STRUCTURALLY_SYMMETRIC:
1246   case MAT_HERMITIAN:
1247   case MAT_SYMMETRY_ETERNAL:
1248     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1249     break;
1250   case MAT_NOT_SYMMETRIC:
1251   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1252   case MAT_NOT_HERMITIAN:
1253   case MAT_NOT_SYMMETRY_ETERNAL:
1254     break;
1255   default:
1256     SETERRQ(PETSC_ERR_SUP,"unknown option");
1257   }
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatGetRow_MPIAIJ"
1263 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1264 {
1265   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1266   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1267   PetscErrorCode ierr;
1268   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1269   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1270   PetscInt       *cmap,*idx_p;
1271 
1272   PetscFunctionBegin;
1273   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1274   mat->getrowactive = PETSC_TRUE;
1275 
1276   if (!mat->rowvalues && (idx || v)) {
1277     /*
1278         allocate enough space to hold information from the longest row.
1279     */
1280     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1281     PetscInt     max = 1,tmp;
1282     for (i=0; i<matin->m; i++) {
1283       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1284       if (max < tmp) { max = tmp; }
1285     }
1286     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1287     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1288   }
1289 
1290   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1291   lrow = row - rstart;
1292 
1293   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1294   if (!v)   {pvA = 0; pvB = 0;}
1295   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1296   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1297   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1298   nztot = nzA + nzB;
1299 
1300   cmap  = mat->garray;
1301   if (v  || idx) {
1302     if (nztot) {
1303       /* Sort by increasing column numbers, assuming A and B already sorted */
1304       PetscInt imark = -1;
1305       if (v) {
1306         *v = v_p = mat->rowvalues;
1307         for (i=0; i<nzB; i++) {
1308           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1309           else break;
1310         }
1311         imark = i;
1312         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1313         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1314       }
1315       if (idx) {
1316         *idx = idx_p = mat->rowindices;
1317         if (imark > -1) {
1318           for (i=0; i<imark; i++) {
1319             idx_p[i] = cmap[cworkB[i]];
1320           }
1321         } else {
1322           for (i=0; i<nzB; i++) {
1323             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1324             else break;
1325           }
1326           imark = i;
1327         }
1328         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1329         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1330       }
1331     } else {
1332       if (idx) *idx = 0;
1333       if (v)   *v   = 0;
1334     }
1335   }
1336   *nz = nztot;
1337   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1338   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1344 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1345 {
1346   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1347 
1348   PetscFunctionBegin;
1349   if (!aij->getrowactive) {
1350     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1351   }
1352   aij->getrowactive = PETSC_FALSE;
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatNorm_MPIAIJ"
1358 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1359 {
1360   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1361   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1362   PetscErrorCode ierr;
1363   PetscInt       i,j,cstart = aij->cstart;
1364   PetscReal      sum = 0.0;
1365   PetscScalar    *v;
1366 
1367   PetscFunctionBegin;
1368   if (aij->size == 1) {
1369     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1370   } else {
1371     if (type == NORM_FROBENIUS) {
1372       v = amat->a;
1373       for (i=0; i<amat->nz; i++) {
1374 #if defined(PETSC_USE_COMPLEX)
1375         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1376 #else
1377         sum += (*v)*(*v); v++;
1378 #endif
1379       }
1380       v = bmat->a;
1381       for (i=0; i<bmat->nz; i++) {
1382 #if defined(PETSC_USE_COMPLEX)
1383         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1384 #else
1385         sum += (*v)*(*v); v++;
1386 #endif
1387       }
1388       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1389       *norm = sqrt(*norm);
1390     } else if (type == NORM_1) { /* max column norm */
1391       PetscReal *tmp,*tmp2;
1392       PetscInt    *jj,*garray = aij->garray;
1393       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1394       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1395       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1396       *norm = 0.0;
1397       v = amat->a; jj = amat->j;
1398       for (j=0; j<amat->nz; j++) {
1399         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1400       }
1401       v = bmat->a; jj = bmat->j;
1402       for (j=0; j<bmat->nz; j++) {
1403         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1404       }
1405       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1406       for (j=0; j<mat->N; j++) {
1407         if (tmp2[j] > *norm) *norm = tmp2[j];
1408       }
1409       ierr = PetscFree(tmp);CHKERRQ(ierr);
1410       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1411     } else if (type == NORM_INFINITY) { /* max row norm */
1412       PetscReal ntemp = 0.0;
1413       for (j=0; j<aij->A->m; j++) {
1414         v = amat->a + amat->i[j];
1415         sum = 0.0;
1416         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1417           sum += PetscAbsScalar(*v); v++;
1418         }
1419         v = bmat->a + bmat->i[j];
1420         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1421           sum += PetscAbsScalar(*v); v++;
1422         }
1423         if (sum > ntemp) ntemp = sum;
1424       }
1425       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1426     } else {
1427       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1428     }
1429   }
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatTranspose_MPIAIJ"
1435 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1436 {
1437   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1438   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1439   PetscErrorCode ierr;
1440   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1441   Mat            B;
1442   PetscScalar    *array;
1443 
1444   PetscFunctionBegin;
1445   if (!matout && M != N) {
1446     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1447   }
1448 
1449   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1450   ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr);
1451   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1452   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1453 
1454   /* copy over the A part */
1455   Aloc = (Mat_SeqAIJ*)a->A->data;
1456   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1457   row = a->rstart;
1458   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1459   for (i=0; i<m; i++) {
1460     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1461     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1462   }
1463   aj = Aloc->j;
1464   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1465 
1466   /* copy over the B part */
1467   Aloc = (Mat_SeqAIJ*)a->B->data;
1468   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1469   row  = a->rstart;
1470   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1471   ct   = cols;
1472   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1473   for (i=0; i<m; i++) {
1474     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1475     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1476   }
1477   ierr = PetscFree(ct);CHKERRQ(ierr);
1478   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1479   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1480   if (matout) {
1481     *matout = B;
1482   } else {
1483     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1484   }
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1490 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1491 {
1492   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1493   Mat            a = aij->A,b = aij->B;
1494   PetscErrorCode ierr;
1495   PetscInt       s1,s2,s3;
1496 
1497   PetscFunctionBegin;
1498   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1499   if (rr) {
1500     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1501     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1502     /* Overlap communication with computation. */
1503     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1504   }
1505   if (ll) {
1506     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1507     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1508     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1509   }
1510   /* scale  the diagonal block */
1511   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1512 
1513   if (rr) {
1514     /* Do a scatter end and then right scale the off-diagonal block */
1515     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1516     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1517   }
1518 
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 
1523 #undef __FUNCT__
1524 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1525 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1526 {
1527   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1528   PetscErrorCode ierr;
1529 
1530   PetscFunctionBegin;
1531   if (!a->rank) {
1532     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1539 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1540 {
1541   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1542   PetscErrorCode ierr;
1543 
1544   PetscFunctionBegin;
1545   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1546   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1547   PetscFunctionReturn(0);
1548 }
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1551 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1552 {
1553   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1554   PetscErrorCode ierr;
1555 
1556   PetscFunctionBegin;
1557   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatEqual_MPIAIJ"
1563 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1564 {
1565   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1566   Mat            a,b,c,d;
1567   PetscTruth     flg;
1568   PetscErrorCode ierr;
1569 
1570   PetscFunctionBegin;
1571   a = matA->A; b = matA->B;
1572   c = matB->A; d = matB->B;
1573 
1574   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1575   if (flg) {
1576     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1577   }
1578   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatCopy_MPIAIJ"
1584 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1585 {
1586   PetscErrorCode ierr;
1587   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1588   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1589 
1590   PetscFunctionBegin;
1591   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1592   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1593     /* because of the column compression in the off-processor part of the matrix a->B,
1594        the number of columns in a->B and b->B may be different, hence we cannot call
1595        the MatCopy() directly on the two parts. If need be, we can provide a more
1596        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1597        then copying the submatrices */
1598     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1599   } else {
1600     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1601     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1602   }
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 #undef __FUNCT__
1607 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1608 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1609 {
1610   PetscErrorCode ierr;
1611 
1612   PetscFunctionBegin;
1613   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #include "petscblaslapack.h"
1618 #undef __FUNCT__
1619 #define __FUNCT__ "MatAXPY_MPIAIJ"
1620 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1621 {
1622   PetscErrorCode ierr;
1623   PetscInt       i;
1624   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1625   PetscBLASInt   bnz,one=1;
1626   Mat_SeqAIJ     *x,*y;
1627 
1628   PetscFunctionBegin;
1629   if (str == SAME_NONZERO_PATTERN) {
1630     PetscScalar alpha = a;
1631     x = (Mat_SeqAIJ *)xx->A->data;
1632     y = (Mat_SeqAIJ *)yy->A->data;
1633     bnz = (PetscBLASInt)x->nz;
1634     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1635     x = (Mat_SeqAIJ *)xx->B->data;
1636     y = (Mat_SeqAIJ *)yy->B->data;
1637     bnz = (PetscBLASInt)x->nz;
1638     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1639   } else if (str == SUBSET_NONZERO_PATTERN) {
1640     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1641 
1642     x = (Mat_SeqAIJ *)xx->B->data;
1643     y = (Mat_SeqAIJ *)yy->B->data;
1644     if (y->xtoy && y->XtoY != xx->B) {
1645       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1646       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1647     }
1648     if (!y->xtoy) { /* get xtoy */
1649       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1650       y->XtoY = xx->B;
1651     }
1652     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1653   } else {
1654     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1655   }
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1660 
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatConjugate_MPIAIJ"
1663 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1664 {
1665 #if defined(PETSC_USE_COMPLEX)
1666   PetscErrorCode ierr;
1667   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1668 
1669   PetscFunctionBegin;
1670   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1671   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1672 #else
1673   PetscFunctionBegin;
1674 #endif
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 #undef __FUNCT__
1679 #define __FUNCT__ "MatRealPart_MPIAIJ"
1680 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1681 {
1682   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1683   PetscErrorCode ierr;
1684 
1685   PetscFunctionBegin;
1686   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1687   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1693 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1694 {
1695   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1696   PetscErrorCode ierr;
1697 
1698   PetscFunctionBegin;
1699   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1700   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 /* -------------------------------------------------------------------*/
1705 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1706        MatGetRow_MPIAIJ,
1707        MatRestoreRow_MPIAIJ,
1708        MatMult_MPIAIJ,
1709 /* 4*/ MatMultAdd_MPIAIJ,
1710        MatMultTranspose_MPIAIJ,
1711        MatMultTransposeAdd_MPIAIJ,
1712        0,
1713        0,
1714        0,
1715 /*10*/ 0,
1716        0,
1717        0,
1718        MatRelax_MPIAIJ,
1719        MatTranspose_MPIAIJ,
1720 /*15*/ MatGetInfo_MPIAIJ,
1721        MatEqual_MPIAIJ,
1722        MatGetDiagonal_MPIAIJ,
1723        MatDiagonalScale_MPIAIJ,
1724        MatNorm_MPIAIJ,
1725 /*20*/ MatAssemblyBegin_MPIAIJ,
1726        MatAssemblyEnd_MPIAIJ,
1727        0,
1728        MatSetOption_MPIAIJ,
1729        MatZeroEntries_MPIAIJ,
1730 /*25*/ MatZeroRows_MPIAIJ,
1731        0,
1732        0,
1733        0,
1734        0,
1735 /*30*/ MatSetUpPreallocation_MPIAIJ,
1736        0,
1737        0,
1738        0,
1739        0,
1740 /*35*/ MatDuplicate_MPIAIJ,
1741        0,
1742        0,
1743        0,
1744        0,
1745 /*40*/ MatAXPY_MPIAIJ,
1746        MatGetSubMatrices_MPIAIJ,
1747        MatIncreaseOverlap_MPIAIJ,
1748        MatGetValues_MPIAIJ,
1749        MatCopy_MPIAIJ,
1750 /*45*/ MatPrintHelp_MPIAIJ,
1751        MatScale_MPIAIJ,
1752        0,
1753        0,
1754        0,
1755 /*50*/ MatSetBlockSize_MPIAIJ,
1756        0,
1757        0,
1758        0,
1759        0,
1760 /*55*/ MatFDColoringCreate_MPIAIJ,
1761        0,
1762        MatSetUnfactored_MPIAIJ,
1763        MatPermute_MPIAIJ,
1764        0,
1765 /*60*/ MatGetSubMatrix_MPIAIJ,
1766        MatDestroy_MPIAIJ,
1767        MatView_MPIAIJ,
1768        MatGetPetscMaps_Petsc,
1769        0,
1770 /*65*/ 0,
1771        0,
1772        0,
1773        0,
1774        0,
1775 /*70*/ 0,
1776        0,
1777        MatSetColoring_MPIAIJ,
1778 #if defined(PETSC_HAVE_ADIC)
1779        MatSetValuesAdic_MPIAIJ,
1780 #else
1781        0,
1782 #endif
1783        MatSetValuesAdifor_MPIAIJ,
1784 /*75*/ 0,
1785        0,
1786        0,
1787        0,
1788        0,
1789 /*80*/ 0,
1790        0,
1791        0,
1792        0,
1793 /*84*/ MatLoad_MPIAIJ,
1794        0,
1795        0,
1796        0,
1797        0,
1798        0,
1799 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1800        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1801        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1802        MatPtAP_Basic,
1803        MatPtAPSymbolic_MPIAIJ,
1804 /*95*/ MatPtAPNumeric_MPIAIJ,
1805        0,
1806        0,
1807        0,
1808        0,
1809 /*100*/0,
1810        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1811        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1812        MatConjugate_MPIAIJ,
1813        0,
1814 /*105*/MatSetValuesRow_MPIAIJ,
1815        MatRealPart_MPIAIJ,
1816        MatImaginaryPart_MPIAIJ};
1817 
1818 /* ----------------------------------------------------------------------------------------*/
1819 
1820 EXTERN_C_BEGIN
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1823 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1824 {
1825   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1826   PetscErrorCode ierr;
1827 
1828   PetscFunctionBegin;
1829   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1830   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 EXTERN_C_END
1834 
1835 EXTERN_C_BEGIN
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1838 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1839 {
1840   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1841   PetscErrorCode ierr;
1842 
1843   PetscFunctionBegin;
1844   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1845   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1846   PetscFunctionReturn(0);
1847 }
1848 EXTERN_C_END
1849 
1850 #include "petscpc.h"
1851 EXTERN_C_BEGIN
1852 #undef __FUNCT__
1853 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1854 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1855 {
1856   Mat_MPIAIJ     *b;
1857   PetscErrorCode ierr;
1858   PetscInt       i;
1859 
1860   PetscFunctionBegin;
1861   B->preallocated = PETSC_TRUE;
1862   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1863   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1864   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1865   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1866   if (d_nnz) {
1867     for (i=0; i<B->m; i++) {
1868       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1869     }
1870   }
1871   if (o_nnz) {
1872     for (i=0; i<B->m; i++) {
1873       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1874     }
1875   }
1876   b = (Mat_MPIAIJ*)B->data;
1877   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1878   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1879 
1880   PetscFunctionReturn(0);
1881 }
1882 EXTERN_C_END
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1886 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1887 {
1888   Mat            mat;
1889   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1890   PetscErrorCode ierr;
1891 
1892   PetscFunctionBegin;
1893   *newmat       = 0;
1894   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1895   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1896   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1897   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1898   a    = (Mat_MPIAIJ*)mat->data;
1899 
1900   mat->factor       = matin->factor;
1901   mat->bs           = matin->bs;
1902   mat->assembled    = PETSC_TRUE;
1903   mat->insertmode   = NOT_SET_VALUES;
1904   mat->preallocated = PETSC_TRUE;
1905 
1906   a->rstart       = oldmat->rstart;
1907   a->rend         = oldmat->rend;
1908   a->cstart       = oldmat->cstart;
1909   a->cend         = oldmat->cend;
1910   a->size         = oldmat->size;
1911   a->rank         = oldmat->rank;
1912   a->donotstash   = oldmat->donotstash;
1913   a->roworiented  = oldmat->roworiented;
1914   a->rowindices   = 0;
1915   a->rowvalues    = 0;
1916   a->getrowactive = PETSC_FALSE;
1917 
1918   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1919   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1920   if (oldmat->colmap) {
1921 #if defined (PETSC_USE_CTABLE)
1922     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1923 #else
1924     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1925     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1926     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1927 #endif
1928   } else a->colmap = 0;
1929   if (oldmat->garray) {
1930     PetscInt len;
1931     len  = oldmat->B->n;
1932     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1933     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1934     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1935   } else a->garray = 0;
1936 
1937   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1938   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1939   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1940   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1941   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1942   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1943   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1944   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1945   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1946   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1947   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1948   *newmat = mat;
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #include "petscsys.h"
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatLoad_MPIAIJ"
1956 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1957 {
1958   Mat            A;
1959   PetscScalar    *vals,*svals;
1960   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1961   MPI_Status     status;
1962   PetscErrorCode ierr;
1963   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1964   PetscInt       i,nz,j,rstart,rend,mmax;
1965   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1966   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1967   PetscInt       cend,cstart,n,*rowners;
1968   int            fd;
1969 
1970   PetscFunctionBegin;
1971   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1972   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1973   if (!rank) {
1974     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1975     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1976     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1977   }
1978 
1979   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1980   M = header[1]; N = header[2];
1981   /* determine ownership of all rows */
1982   m    = M/size + ((M % size) > rank);
1983   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1984   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1985 
1986   /* First process needs enough room for process with most rows */
1987   if (!rank) {
1988     mmax       = rowners[1];
1989     for (i=2; i<size; i++) {
1990       mmax = PetscMax(mmax,rowners[i]);
1991     }
1992   } else mmax = m;
1993 
1994   rowners[0] = 0;
1995   for (i=2; i<=size; i++) {
1996     mmax       = PetscMax(mmax,rowners[i]);
1997     rowners[i] += rowners[i-1];
1998   }
1999   rstart = rowners[rank];
2000   rend   = rowners[rank+1];
2001 
2002   /* distribute row lengths to all processors */
2003   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2004   if (!rank) {
2005     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2006     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2007     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2008     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2009     for (j=0; j<m; j++) {
2010       procsnz[0] += ourlens[j];
2011     }
2012     for (i=1; i<size; i++) {
2013       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2014       /* calculate the number of nonzeros on each processor */
2015       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2016         procsnz[i] += rowlengths[j];
2017       }
2018       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2019     }
2020     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2021   } else {
2022     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2023   }
2024 
2025   if (!rank) {
2026     /* determine max buffer needed and allocate it */
2027     maxnz = 0;
2028     for (i=0; i<size; i++) {
2029       maxnz = PetscMax(maxnz,procsnz[i]);
2030     }
2031     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2032 
2033     /* read in my part of the matrix column indices  */
2034     nz   = procsnz[0];
2035     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2036     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2037 
2038     /* read in every one elses and ship off */
2039     for (i=1; i<size; i++) {
2040       nz   = procsnz[i];
2041       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2042       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2043     }
2044     ierr = PetscFree(cols);CHKERRQ(ierr);
2045   } else {
2046     /* determine buffer space needed for message */
2047     nz = 0;
2048     for (i=0; i<m; i++) {
2049       nz += ourlens[i];
2050     }
2051     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2052 
2053     /* receive message of column indices*/
2054     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2055     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2056     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2057   }
2058 
2059   /* determine column ownership if matrix is not square */
2060   if (N != M) {
2061     n      = N/size + ((N % size) > rank);
2062     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2063     cstart = cend - n;
2064   } else {
2065     cstart = rstart;
2066     cend   = rend;
2067     n      = cend - cstart;
2068   }
2069 
2070   /* loop over local rows, determining number of off diagonal entries */
2071   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2072   jj = 0;
2073   for (i=0; i<m; i++) {
2074     for (j=0; j<ourlens[i]; j++) {
2075       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2076       jj++;
2077     }
2078   }
2079 
2080   /* create our matrix */
2081   for (i=0; i<m; i++) {
2082     ourlens[i] -= offlens[i];
2083   }
2084   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2085   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2086   ierr = MatSetType(A,type);CHKERRQ(ierr);
2087   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2088 
2089   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2090   for (i=0; i<m; i++) {
2091     ourlens[i] += offlens[i];
2092   }
2093 
2094   if (!rank) {
2095     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2096 
2097     /* read in my part of the matrix numerical values  */
2098     nz   = procsnz[0];
2099     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2100 
2101     /* insert into matrix */
2102     jj      = rstart;
2103     smycols = mycols;
2104     svals   = vals;
2105     for (i=0; i<m; i++) {
2106       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2107       smycols += ourlens[i];
2108       svals   += ourlens[i];
2109       jj++;
2110     }
2111 
2112     /* read in other processors and ship out */
2113     for (i=1; i<size; i++) {
2114       nz   = procsnz[i];
2115       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2116       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2117     }
2118     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2119   } else {
2120     /* receive numeric values */
2121     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2122 
2123     /* receive message of values*/
2124     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2125     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2126     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2127 
2128     /* insert into matrix */
2129     jj      = rstart;
2130     smycols = mycols;
2131     svals   = vals;
2132     for (i=0; i<m; i++) {
2133       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2134       smycols += ourlens[i];
2135       svals   += ourlens[i];
2136       jj++;
2137     }
2138   }
2139   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2140   ierr = PetscFree(vals);CHKERRQ(ierr);
2141   ierr = PetscFree(mycols);CHKERRQ(ierr);
2142   ierr = PetscFree(rowners);CHKERRQ(ierr);
2143 
2144   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2145   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2146   *newmat = A;
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2152 /*
2153     Not great since it makes two copies of the submatrix, first an SeqAIJ
2154   in local and then by concatenating the local matrices the end result.
2155   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2156 */
2157 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2158 {
2159   PetscErrorCode ierr;
2160   PetscMPIInt    rank,size;
2161   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2162   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2163   Mat            *local,M,Mreuse;
2164   PetscScalar    *vwork,*aa;
2165   MPI_Comm       comm = mat->comm;
2166   Mat_SeqAIJ     *aij;
2167 
2168 
2169   PetscFunctionBegin;
2170   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2171   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2172 
2173   if (call ==  MAT_REUSE_MATRIX) {
2174     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2175     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2176     local = &Mreuse;
2177     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2178   } else {
2179     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2180     Mreuse = *local;
2181     ierr   = PetscFree(local);CHKERRQ(ierr);
2182   }
2183 
2184   /*
2185       m - number of local rows
2186       n - number of columns (same on all processors)
2187       rstart - first row in new global matrix generated
2188   */
2189   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2190   if (call == MAT_INITIAL_MATRIX) {
2191     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2192     ii  = aij->i;
2193     jj  = aij->j;
2194 
2195     /*
2196         Determine the number of non-zeros in the diagonal and off-diagonal
2197         portions of the matrix in order to do correct preallocation
2198     */
2199 
2200     /* first get start and end of "diagonal" columns */
2201     if (csize == PETSC_DECIDE) {
2202       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2203       if (mglobal == n) { /* square matrix */
2204 	nlocal = m;
2205       } else {
2206         nlocal = n/size + ((n % size) > rank);
2207       }
2208     } else {
2209       nlocal = csize;
2210     }
2211     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2212     rstart = rend - nlocal;
2213     if (rank == size - 1 && rend != n) {
2214       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2215     }
2216 
2217     /* next, compute all the lengths */
2218     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2219     olens = dlens + m;
2220     for (i=0; i<m; i++) {
2221       jend = ii[i+1] - ii[i];
2222       olen = 0;
2223       dlen = 0;
2224       for (j=0; j<jend; j++) {
2225         if (*jj < rstart || *jj >= rend) olen++;
2226         else dlen++;
2227         jj++;
2228       }
2229       olens[i] = olen;
2230       dlens[i] = dlen;
2231     }
2232     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2233     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2234     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2235     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2236     ierr = PetscFree(dlens);CHKERRQ(ierr);
2237   } else {
2238     PetscInt ml,nl;
2239 
2240     M = *newmat;
2241     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2242     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2243     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2244     /*
2245          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2246        rather than the slower MatSetValues().
2247     */
2248     M->was_assembled = PETSC_TRUE;
2249     M->assembled     = PETSC_FALSE;
2250   }
2251   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2252   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2253   ii  = aij->i;
2254   jj  = aij->j;
2255   aa  = aij->a;
2256   for (i=0; i<m; i++) {
2257     row   = rstart + i;
2258     nz    = ii[i+1] - ii[i];
2259     cwork = jj;     jj += nz;
2260     vwork = aa;     aa += nz;
2261     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2262   }
2263 
2264   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2265   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2266   *newmat = M;
2267 
2268   /* save submatrix used in processor for next request */
2269   if (call ==  MAT_INITIAL_MATRIX) {
2270     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2271     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2272   }
2273 
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 EXTERN_C_BEGIN
2278 #undef __FUNCT__
2279 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2280 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2281 {
2282   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2283   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2284   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2285   const PetscInt *JJ;
2286   PetscScalar    *values;
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2291   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2292   o_nnz = d_nnz + m;
2293 
2294   for (i=0; i<m; i++) {
2295     nnz     = I[i+1]- I[i];
2296     JJ      = J + I[i];
2297     nnz_max = PetscMax(nnz_max,nnz);
2298     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2299     for (j=0; j<nnz; j++) {
2300       if (*JJ >= cstart) break;
2301       JJ++;
2302     }
2303     d = 0;
2304     for (; j<nnz; j++) {
2305       if (*JJ++ >= cend) break;
2306       d++;
2307     }
2308     d_nnz[i] = d;
2309     o_nnz[i] = nnz - d;
2310   }
2311   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2312   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2313 
2314   if (v) values = (PetscScalar*)v;
2315   else {
2316     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2317     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2318   }
2319 
2320   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2321   for (i=0; i<m; i++) {
2322     ii   = i + rstart;
2323     nnz  = I[i+1]- I[i];
2324     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2325   }
2326   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2327   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2328   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2329 
2330   if (!v) {
2331     ierr = PetscFree(values);CHKERRQ(ierr);
2332   }
2333   PetscFunctionReturn(0);
2334 }
2335 EXTERN_C_END
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2339 /*@
2340    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2341    (the default parallel PETSc format).
2342 
2343    Collective on MPI_Comm
2344 
2345    Input Parameters:
2346 +  B - the matrix
2347 .  i - the indices into j for the start of each local row (starts with zero)
2348 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2349 -  v - optional values in the matrix
2350 
2351    Level: developer
2352 
2353 .keywords: matrix, aij, compressed row, sparse, parallel
2354 
2355 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2356 @*/
2357 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2358 {
2359   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2360 
2361   PetscFunctionBegin;
2362   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2363   if (f) {
2364     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2365   }
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2371 /*@C
2372    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2373    (the default parallel PETSc format).  For good matrix assembly performance
2374    the user should preallocate the matrix storage by setting the parameters
2375    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2376    performance can be increased by more than a factor of 50.
2377 
2378    Collective on MPI_Comm
2379 
2380    Input Parameters:
2381 +  A - the matrix
2382 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2383            (same value is used for all local rows)
2384 .  d_nnz - array containing the number of nonzeros in the various rows of the
2385            DIAGONAL portion of the local submatrix (possibly different for each row)
2386            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2387            The size of this array is equal to the number of local rows, i.e 'm'.
2388            You must leave room for the diagonal entry even if it is zero.
2389 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2390            submatrix (same value is used for all local rows).
2391 -  o_nnz - array containing the number of nonzeros in the various rows of the
2392            OFF-DIAGONAL portion of the local submatrix (possibly different for
2393            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2394            structure. The size of this array is equal to the number
2395            of local rows, i.e 'm'.
2396 
2397    If the *_nnz parameter is given then the *_nz parameter is ignored
2398 
2399    The AIJ format (also called the Yale sparse matrix format or
2400    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2401    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2402 
2403    The parallel matrix is partitioned such that the first m0 rows belong to
2404    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2405    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2406 
2407    The DIAGONAL portion of the local submatrix of a processor can be defined
2408    as the submatrix which is obtained by extraction the part corresponding
2409    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2410    first row that belongs to the processor, and r2 is the last row belonging
2411    to the this processor. This is a square mxm matrix. The remaining portion
2412    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2413 
2414    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2415 
2416    Example usage:
2417 
2418    Consider the following 8x8 matrix with 34 non-zero values, that is
2419    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2420    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2421    as follows:
2422 
2423 .vb
2424             1  2  0  |  0  3  0  |  0  4
2425     Proc0   0  5  6  |  7  0  0  |  8  0
2426             9  0 10  | 11  0  0  | 12  0
2427     -------------------------------------
2428            13  0 14  | 15 16 17  |  0  0
2429     Proc1   0 18  0  | 19 20 21  |  0  0
2430             0  0  0  | 22 23  0  | 24  0
2431     -------------------------------------
2432     Proc2  25 26 27  |  0  0 28  | 29  0
2433            30  0  0  | 31 32 33  |  0 34
2434 .ve
2435 
2436    This can be represented as a collection of submatrices as:
2437 
2438 .vb
2439       A B C
2440       D E F
2441       G H I
2442 .ve
2443 
2444    Where the submatrices A,B,C are owned by proc0, D,E,F are
2445    owned by proc1, G,H,I are owned by proc2.
2446 
2447    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2448    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2449    The 'M','N' parameters are 8,8, and have the same values on all procs.
2450 
2451    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2452    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2453    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2454    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2455    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2456    matrix, ans [DF] as another SeqAIJ matrix.
2457 
2458    When d_nz, o_nz parameters are specified, d_nz storage elements are
2459    allocated for every row of the local diagonal submatrix, and o_nz
2460    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2461    One way to choose d_nz and o_nz is to use the max nonzerors per local
2462    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2463    In this case, the values of d_nz,o_nz are:
2464 .vb
2465      proc0 : dnz = 2, o_nz = 2
2466      proc1 : dnz = 3, o_nz = 2
2467      proc2 : dnz = 1, o_nz = 4
2468 .ve
2469    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2470    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2471    for proc3. i.e we are using 12+15+10=37 storage locations to store
2472    34 values.
2473 
2474    When d_nnz, o_nnz parameters are specified, the storage is specified
2475    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2476    In the above case the values for d_nnz,o_nnz are:
2477 .vb
2478      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2479      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2480      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2481 .ve
2482    Here the space allocated is sum of all the above values i.e 34, and
2483    hence pre-allocation is perfect.
2484 
2485    Level: intermediate
2486 
2487 .keywords: matrix, aij, compressed row, sparse, parallel
2488 
2489 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2490           MPIAIJ
2491 @*/
2492 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2493 {
2494   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2495 
2496   PetscFunctionBegin;
2497   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2498   if (f) {
2499     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2500   }
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatCreateMPIAIJ"
2506 /*@C
2507    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2508    (the default parallel PETSc format).  For good matrix assembly performance
2509    the user should preallocate the matrix storage by setting the parameters
2510    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2511    performance can be increased by more than a factor of 50.
2512 
2513    Collective on MPI_Comm
2514 
2515    Input Parameters:
2516 +  comm - MPI communicator
2517 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2518            This value should be the same as the local size used in creating the
2519            y vector for the matrix-vector product y = Ax.
2520 .  n - This value should be the same as the local size used in creating the
2521        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2522        calculated if N is given) For square matrices n is almost always m.
2523 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2524 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2525 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2526            (same value is used for all local rows)
2527 .  d_nnz - array containing the number of nonzeros in the various rows of the
2528            DIAGONAL portion of the local submatrix (possibly different for each row)
2529            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2530            The size of this array is equal to the number of local rows, i.e 'm'.
2531            You must leave room for the diagonal entry even if it is zero.
2532 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2533            submatrix (same value is used for all local rows).
2534 -  o_nnz - array containing the number of nonzeros in the various rows of the
2535            OFF-DIAGONAL portion of the local submatrix (possibly different for
2536            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2537            structure. The size of this array is equal to the number
2538            of local rows, i.e 'm'.
2539 
2540    Output Parameter:
2541 .  A - the matrix
2542 
2543    Notes:
2544    If the *_nnz parameter is given then the *_nz parameter is ignored
2545 
2546    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2547    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2548    storage requirements for this matrix.
2549 
2550    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2551    processor than it must be used on all processors that share the object for
2552    that argument.
2553 
2554    The user MUST specify either the local or global matrix dimensions
2555    (possibly both).
2556 
2557    The parallel matrix is partitioned such that the first m0 rows belong to
2558    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2559    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2560 
2561    The DIAGONAL portion of the local submatrix of a processor can be defined
2562    as the submatrix which is obtained by extraction the part corresponding
2563    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2564    first row that belongs to the processor, and r2 is the last row belonging
2565    to the this processor. This is a square mxm matrix. The remaining portion
2566    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2567 
2568    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2569 
2570    When calling this routine with a single process communicator, a matrix of
2571    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2572    type of communicator, use the construction mechanism:
2573      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2574 
2575    By default, this format uses inodes (identical nodes) when possible.
2576    We search for consecutive rows with the same nonzero structure, thereby
2577    reusing matrix information to achieve increased efficiency.
2578 
2579    Options Database Keys:
2580 +  -mat_no_inode  - Do not use inodes
2581 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2582 -  -mat_aij_oneindex - Internally use indexing starting at 1
2583         rather than 0.  Note that when calling MatSetValues(),
2584         the user still MUST index entries starting at 0!
2585 
2586 
2587    Example usage:
2588 
2589    Consider the following 8x8 matrix with 34 non-zero values, that is
2590    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2591    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2592    as follows:
2593 
2594 .vb
2595             1  2  0  |  0  3  0  |  0  4
2596     Proc0   0  5  6  |  7  0  0  |  8  0
2597             9  0 10  | 11  0  0  | 12  0
2598     -------------------------------------
2599            13  0 14  | 15 16 17  |  0  0
2600     Proc1   0 18  0  | 19 20 21  |  0  0
2601             0  0  0  | 22 23  0  | 24  0
2602     -------------------------------------
2603     Proc2  25 26 27  |  0  0 28  | 29  0
2604            30  0  0  | 31 32 33  |  0 34
2605 .ve
2606 
2607    This can be represented as a collection of submatrices as:
2608 
2609 .vb
2610       A B C
2611       D E F
2612       G H I
2613 .ve
2614 
2615    Where the submatrices A,B,C are owned by proc0, D,E,F are
2616    owned by proc1, G,H,I are owned by proc2.
2617 
2618    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2619    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2620    The 'M','N' parameters are 8,8, and have the same values on all procs.
2621 
2622    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2623    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2624    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2625    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2626    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2627    matrix, ans [DF] as another SeqAIJ matrix.
2628 
2629    When d_nz, o_nz parameters are specified, d_nz storage elements are
2630    allocated for every row of the local diagonal submatrix, and o_nz
2631    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2632    One way to choose d_nz and o_nz is to use the max nonzerors per local
2633    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2634    In this case, the values of d_nz,o_nz are:
2635 .vb
2636      proc0 : dnz = 2, o_nz = 2
2637      proc1 : dnz = 3, o_nz = 2
2638      proc2 : dnz = 1, o_nz = 4
2639 .ve
2640    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2641    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2642    for proc3. i.e we are using 12+15+10=37 storage locations to store
2643    34 values.
2644 
2645    When d_nnz, o_nnz parameters are specified, the storage is specified
2646    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2647    In the above case the values for d_nnz,o_nnz are:
2648 .vb
2649      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2650      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2651      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2652 .ve
2653    Here the space allocated is sum of all the above values i.e 34, and
2654    hence pre-allocation is perfect.
2655 
2656    Level: intermediate
2657 
2658 .keywords: matrix, aij, compressed row, sparse, parallel
2659 
2660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2661           MPIAIJ
2662 @*/
2663 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2664 {
2665   PetscErrorCode ierr;
2666   PetscMPIInt    size;
2667 
2668   PetscFunctionBegin;
2669   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2670   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2671   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2672   if (size > 1) {
2673     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2674     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2675   } else {
2676     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2677     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2678   }
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 #undef __FUNCT__
2683 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2684 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2685 {
2686   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2687 
2688   PetscFunctionBegin;
2689   *Ad     = a->A;
2690   *Ao     = a->B;
2691   *colmap = a->garray;
2692   PetscFunctionReturn(0);
2693 }
2694 
2695 #undef __FUNCT__
2696 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2697 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2698 {
2699   PetscErrorCode ierr;
2700   PetscInt       i;
2701   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2702 
2703   PetscFunctionBegin;
2704   if (coloring->ctype == IS_COLORING_LOCAL) {
2705     ISColoringValue *allcolors,*colors;
2706     ISColoring      ocoloring;
2707 
2708     /* set coloring for diagonal portion */
2709     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2710 
2711     /* set coloring for off-diagonal portion */
2712     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2713     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2714     for (i=0; i<a->B->n; i++) {
2715       colors[i] = allcolors[a->garray[i]];
2716     }
2717     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2718     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2719     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2720     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2721   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2722     ISColoringValue *colors;
2723     PetscInt             *larray;
2724     ISColoring      ocoloring;
2725 
2726     /* set coloring for diagonal portion */
2727     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2728     for (i=0; i<a->A->n; i++) {
2729       larray[i] = i + a->cstart;
2730     }
2731     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2732     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2733     for (i=0; i<a->A->n; i++) {
2734       colors[i] = coloring->colors[larray[i]];
2735     }
2736     ierr = PetscFree(larray);CHKERRQ(ierr);
2737     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2738     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2739     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2740 
2741     /* set coloring for off-diagonal portion */
2742     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2743     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2744     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2745     for (i=0; i<a->B->n; i++) {
2746       colors[i] = coloring->colors[larray[i]];
2747     }
2748     ierr = PetscFree(larray);CHKERRQ(ierr);
2749     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2750     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2751     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2752   } else {
2753     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2754   }
2755 
2756   PetscFunctionReturn(0);
2757 }
2758 
2759 #if defined(PETSC_HAVE_ADIC)
2760 #undef __FUNCT__
2761 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2762 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2763 {
2764   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2765   PetscErrorCode ierr;
2766 
2767   PetscFunctionBegin;
2768   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2769   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2770   PetscFunctionReturn(0);
2771 }
2772 #endif
2773 
2774 #undef __FUNCT__
2775 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2776 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2777 {
2778   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2779   PetscErrorCode ierr;
2780 
2781   PetscFunctionBegin;
2782   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2783   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2784   PetscFunctionReturn(0);
2785 }
2786 
2787 #undef __FUNCT__
2788 #define __FUNCT__ "MatMerge"
2789 /*@C
2790       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2791                  matrices from each processor
2792 
2793     Collective on MPI_Comm
2794 
2795    Input Parameters:
2796 +    comm - the communicators the parallel matrix will live on
2797 .    inmat - the input sequential matrices
2798 .    n - number of local columns (or PETSC_DECIDE)
2799 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2800 
2801    Output Parameter:
2802 .    outmat - the parallel matrix generated
2803 
2804     Level: advanced
2805 
2806    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2807 
2808 @*/
2809 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2810 {
2811   PetscErrorCode ierr;
2812   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2813   PetscInt       *indx;
2814   PetscScalar    *values;
2815   PetscMap       columnmap,rowmap;
2816 
2817   PetscFunctionBegin;
2818     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2819   /*
2820   PetscMPIInt       rank;
2821   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2822   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2823   */
2824   if (scall == MAT_INITIAL_MATRIX){
2825     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2826     if (n == PETSC_DECIDE){
2827       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2828       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2829       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2830       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2831       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2832     }
2833 
2834     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2835     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2836     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2837     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2838     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2839 
2840     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2841     for (i=0;i<m;i++) {
2842       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2843       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2844       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2845     }
2846     /* This routine will ONLY return MPIAIJ type matrix */
2847     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2848     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2849     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2850     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2851     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2852 
2853   } else if (scall == MAT_REUSE_MATRIX){
2854     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2855   } else {
2856     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2857   }
2858 
2859   for (i=0;i<m;i++) {
2860     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2861     I    = i + rstart;
2862     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2863     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2864   }
2865   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2866   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2867   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2868 
2869   PetscFunctionReturn(0);
2870 }
2871 
2872 #undef __FUNCT__
2873 #define __FUNCT__ "MatFileSplit"
2874 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2875 {
2876   PetscErrorCode    ierr;
2877   PetscMPIInt       rank;
2878   PetscInt          m,N,i,rstart,nnz;
2879   size_t            len;
2880   const PetscInt    *indx;
2881   PetscViewer       out;
2882   char              *name;
2883   Mat               B;
2884   const PetscScalar *values;
2885 
2886   PetscFunctionBegin;
2887   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2888   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2889   /* Should this be the type of the diagonal block of A? */
2890   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2891   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2892   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2893   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2894   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2895   for (i=0;i<m;i++) {
2896     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2897     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2898     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2899   }
2900   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2901   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2902 
2903   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2904   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2905   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2906   sprintf(name,"%s.%d",outfile,rank);
2907   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
2908   ierr = PetscFree(name);
2909   ierr = MatView(B,out);CHKERRQ(ierr);
2910   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2911   ierr = MatDestroy(B);CHKERRQ(ierr);
2912   PetscFunctionReturn(0);
2913 }
2914 
2915 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2916 #undef __FUNCT__
2917 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2918 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2919 {
2920   PetscErrorCode       ierr;
2921   Mat_Merge_SeqsToMPI  *merge;
2922   PetscObjectContainer container;
2923 
2924   PetscFunctionBegin;
2925   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2926   if (container) {
2927     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2928     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2929     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2930     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2931     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2932     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2933     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2934     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2935     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2936     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2937     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2938     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2939 
2940     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2941     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2942   }
2943   ierr = PetscFree(merge);CHKERRQ(ierr);
2944 
2945   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 #include "src/mat/utils/freespace.h"
2950 #include "petscbt.h"
2951 static PetscEvent logkey_seqstompinum = 0;
2952 #undef __FUNCT__
2953 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2954 /*@C
2955       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2956                  matrices from each processor
2957 
2958     Collective on MPI_Comm
2959 
2960    Input Parameters:
2961 +    comm - the communicators the parallel matrix will live on
2962 .    seqmat - the input sequential matrices
2963 .    m - number of local rows (or PETSC_DECIDE)
2964 .    n - number of local columns (or PETSC_DECIDE)
2965 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2966 
2967    Output Parameter:
2968 .    mpimat - the parallel matrix generated
2969 
2970     Level: advanced
2971 
2972    Notes:
2973      The dimensions of the sequential matrix in each processor MUST be the same.
2974      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2975      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2976 @*/
2977 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2978 {
2979   PetscErrorCode       ierr;
2980   MPI_Comm             comm=mpimat->comm;
2981   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2982   PetscMPIInt          size,rank,taga,*len_s;
2983   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2984   PetscInt             proc,m;
2985   PetscInt             **buf_ri,**buf_rj;
2986   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2987   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2988   MPI_Request          *s_waits,*r_waits;
2989   MPI_Status           *status;
2990   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2991   Mat_Merge_SeqsToMPI  *merge;
2992   PetscObjectContainer container;
2993 
2994   PetscFunctionBegin;
2995   if (!logkey_seqstompinum) {
2996     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2997   }
2998   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2999 
3000   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3001   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3002 
3003   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3004   if (container) {
3005     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3006   }
3007   bi     = merge->bi;
3008   bj     = merge->bj;
3009   buf_ri = merge->buf_ri;
3010   buf_rj = merge->buf_rj;
3011 
3012   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3013   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3014   len_s  = merge->len_s;
3015 
3016   /* send and recv matrix values */
3017   /*-----------------------------*/
3018   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
3019   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3020 
3021   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3022   for (proc=0,k=0; proc<size; proc++){
3023     if (!len_s[proc]) continue;
3024     i = owners[proc];
3025     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3026     k++;
3027   }
3028 
3029   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3030   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3031   ierr = PetscFree(status);CHKERRQ(ierr);
3032 
3033   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3034   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3035 
3036   /* insert mat values of mpimat */
3037   /*----------------------------*/
3038   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3039   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3040   nextrow = buf_ri_k + merge->nrecv;
3041   nextai  = nextrow + merge->nrecv;
3042 
3043   for (k=0; k<merge->nrecv; k++){
3044     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3045     nrows = *(buf_ri_k[k]);
3046     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3047     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3048   }
3049 
3050   /* set values of ba */
3051   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
3052   for (i=0; i<m; i++) {
3053     arow = owners[rank] + i;
3054     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3055     bnzi = bi[i+1] - bi[i];
3056     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3057 
3058     /* add local non-zero vals of this proc's seqmat into ba */
3059     anzi = ai[arow+1] - ai[arow];
3060     aj   = a->j + ai[arow];
3061     aa   = a->a + ai[arow];
3062     nextaj = 0;
3063     for (j=0; nextaj<anzi; j++){
3064       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3065         ba_i[j] += aa[nextaj++];
3066       }
3067     }
3068 
3069     /* add received vals into ba */
3070     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3071       /* i-th row */
3072       if (i == *nextrow[k]) {
3073         anzi = *(nextai[k]+1) - *nextai[k];
3074         aj   = buf_rj[k] + *(nextai[k]);
3075         aa   = abuf_r[k] + *(nextai[k]);
3076         nextaj = 0;
3077         for (j=0; nextaj<anzi; j++){
3078           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3079             ba_i[j] += aa[nextaj++];
3080           }
3081         }
3082         nextrow[k]++; nextai[k]++;
3083       }
3084     }
3085     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3086   }
3087   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3088   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3089 
3090   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3091   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3092   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3093   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3094   PetscFunctionReturn(0);
3095 }
3096 
3097 static PetscEvent logkey_seqstompisym = 0;
3098 #undef __FUNCT__
3099 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3100 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3101 {
3102   PetscErrorCode       ierr;
3103   Mat                  B_mpi;
3104   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3105   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3106   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3107   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3108   PetscInt             len,proc,*dnz,*onz;
3109   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3110   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3111   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3112   MPI_Status           *status;
3113   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3114   PetscBT              lnkbt;
3115   Mat_Merge_SeqsToMPI  *merge;
3116   PetscObjectContainer container;
3117 
3118   PetscFunctionBegin;
3119   if (!logkey_seqstompisym) {
3120     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3121   }
3122   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3123 
3124   /* make sure it is a PETSc comm */
3125   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3126   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3127   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3128 
3129   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3130   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3131 
3132   /* determine row ownership */
3133   /*---------------------------------------------------------*/
3134   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3135   if (m == PETSC_DECIDE) {
3136     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3137   } else {
3138     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3139   }
3140   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3141   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3142   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3143 
3144   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3145   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3146 
3147   /* determine the number of messages to send, their lengths */
3148   /*---------------------------------------------------------*/
3149   len_s  = merge->len_s;
3150 
3151   len = 0;  /* length of buf_si[] */
3152   merge->nsend = 0;
3153   for (proc=0; proc<size; proc++){
3154     len_si[proc] = 0;
3155     if (proc == rank){
3156       len_s[proc] = 0;
3157     } else {
3158       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3159       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3160     }
3161     if (len_s[proc]) {
3162       merge->nsend++;
3163       nrows = 0;
3164       for (i=owners[proc]; i<owners[proc+1]; i++){
3165         if (ai[i+1] > ai[i]) nrows++;
3166       }
3167       len_si[proc] = 2*(nrows+1);
3168       len += len_si[proc];
3169     }
3170   }
3171 
3172   /* determine the number and length of messages to receive for ij-structure */
3173   /*-------------------------------------------------------------------------*/
3174   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3175   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3176 
3177   /* post the Irecv of j-structure */
3178   /*-------------------------------*/
3179   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3180   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3181 
3182   /* post the Isend of j-structure */
3183   /*--------------------------------*/
3184   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3185   sj_waits = si_waits + merge->nsend;
3186 
3187   for (proc=0, k=0; proc<size; proc++){
3188     if (!len_s[proc]) continue;
3189     i = owners[proc];
3190     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3191     k++;
3192   }
3193 
3194   /* receives and sends of j-structure are complete */
3195   /*------------------------------------------------*/
3196   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3197   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3198 
3199   /* send and recv i-structure */
3200   /*---------------------------*/
3201   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3202   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3203 
3204   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3205   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3206   for (proc=0,k=0; proc<size; proc++){
3207     if (!len_s[proc]) continue;
3208     /* form outgoing message for i-structure:
3209          buf_si[0]:                 nrows to be sent
3210                [1:nrows]:           row index (global)
3211                [nrows+1:2*nrows+1]: i-structure index
3212     */
3213     /*-------------------------------------------*/
3214     nrows = len_si[proc]/2 - 1;
3215     buf_si_i    = buf_si + nrows+1;
3216     buf_si[0]   = nrows;
3217     buf_si_i[0] = 0;
3218     nrows = 0;
3219     for (i=owners[proc]; i<owners[proc+1]; i++){
3220       anzi = ai[i+1] - ai[i];
3221       if (anzi) {
3222         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3223         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3224         nrows++;
3225       }
3226     }
3227     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3228     k++;
3229     buf_si += len_si[proc];
3230   }
3231 
3232   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3233   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3234 
3235   ierr = PetscVerboseInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr);
3236   for (i=0; i<merge->nrecv; i++){
3237     ierr = PetscVerboseInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr);
3238   }
3239 
3240   ierr = PetscFree(len_si);CHKERRQ(ierr);
3241   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3242   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3243   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3244   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3245   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3246   ierr = PetscFree(status);CHKERRQ(ierr);
3247 
3248   /* compute a local seq matrix in each processor */
3249   /*----------------------------------------------*/
3250   /* allocate bi array and free space for accumulating nonzero column info */
3251   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3252   bi[0] = 0;
3253 
3254   /* create and initialize a linked list */
3255   nlnk = N+1;
3256   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3257 
3258   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3259   len = 0;
3260   len  = ai[owners[rank+1]] - ai[owners[rank]];
3261   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3262   current_space = free_space;
3263 
3264   /* determine symbolic info for each local row */
3265   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3266   nextrow = buf_ri_k + merge->nrecv;
3267   nextai  = nextrow + merge->nrecv;
3268   for (k=0; k<merge->nrecv; k++){
3269     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3270     nrows = *buf_ri_k[k];
3271     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3272     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3273   }
3274 
3275   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3276   len = 0;
3277   for (i=0;i<m;i++) {
3278     bnzi   = 0;
3279     /* add local non-zero cols of this proc's seqmat into lnk */
3280     arow   = owners[rank] + i;
3281     anzi   = ai[arow+1] - ai[arow];
3282     aj     = a->j + ai[arow];
3283     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3284     bnzi += nlnk;
3285     /* add received col data into lnk */
3286     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3287       if (i == *nextrow[k]) { /* i-th row */
3288         anzi = *(nextai[k]+1) - *nextai[k];
3289         aj   = buf_rj[k] + *nextai[k];
3290         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3291         bnzi += nlnk;
3292         nextrow[k]++; nextai[k]++;
3293       }
3294     }
3295     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3296 
3297     /* if free space is not available, make more free space */
3298     if (current_space->local_remaining<bnzi) {
3299       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3300       nspacedouble++;
3301     }
3302     /* copy data into free space, then initialize lnk */
3303     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3304     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3305 
3306     current_space->array           += bnzi;
3307     current_space->local_used      += bnzi;
3308     current_space->local_remaining -= bnzi;
3309 
3310     bi[i+1] = bi[i] + bnzi;
3311   }
3312 
3313   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3314 
3315   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3316   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3317   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3318 
3319   /* create symbolic parallel matrix B_mpi */
3320   /*---------------------------------------*/
3321   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3322   if (n==PETSC_DECIDE) {
3323     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3324   } else {
3325     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3326   }
3327   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3328   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3329   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3330 
3331   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3332   B_mpi->assembled     = PETSC_FALSE;
3333   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3334   merge->bi            = bi;
3335   merge->bj            = bj;
3336   merge->buf_ri        = buf_ri;
3337   merge->buf_rj        = buf_rj;
3338   merge->coi           = PETSC_NULL;
3339   merge->coj           = PETSC_NULL;
3340   merge->owners_co     = PETSC_NULL;
3341 
3342   /* attach the supporting struct to B_mpi for reuse */
3343   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3344   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3345   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3346   *mpimat = B_mpi;
3347 
3348   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3349   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3350   PetscFunctionReturn(0);
3351 }
3352 
3353 static PetscEvent logkey_seqstompi = 0;
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatMerge_SeqsToMPI"
3356 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3357 {
3358   PetscErrorCode   ierr;
3359 
3360   PetscFunctionBegin;
3361   if (!logkey_seqstompi) {
3362     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3363   }
3364   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3365   if (scall == MAT_INITIAL_MATRIX){
3366     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3367   }
3368   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3369   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3370   PetscFunctionReturn(0);
3371 }
3372 static PetscEvent logkey_getlocalmat = 0;
3373 #undef __FUNCT__
3374 #define __FUNCT__ "MatGetLocalMat"
3375 /*@C
3376      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3377 
3378     Not Collective
3379 
3380    Input Parameters:
3381 +    A - the matrix
3382 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3383 
3384    Output Parameter:
3385 .    A_loc - the local sequential matrix generated
3386 
3387     Level: developer
3388 
3389 @*/
3390 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3391 {
3392   PetscErrorCode  ierr;
3393   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3394   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3395   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3396   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3397   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3398   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3399 
3400   PetscFunctionBegin;
3401   if (!logkey_getlocalmat) {
3402     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3403   }
3404   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3405   if (scall == MAT_INITIAL_MATRIX){
3406     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3407     ci[0] = 0;
3408     for (i=0; i<am; i++){
3409       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3410     }
3411     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3412     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3413     k = 0;
3414     for (i=0; i<am; i++) {
3415       ncols_o = bi[i+1] - bi[i];
3416       ncols_d = ai[i+1] - ai[i];
3417       /* off-diagonal portion of A */
3418       for (jo=0; jo<ncols_o; jo++) {
3419         col = cmap[*bj];
3420         if (col >= cstart) break;
3421         cj[k]   = col; bj++;
3422         ca[k++] = *ba++;
3423       }
3424       /* diagonal portion of A */
3425       for (j=0; j<ncols_d; j++) {
3426         cj[k]   = cstart + *aj++;
3427         ca[k++] = *aa++;
3428       }
3429       /* off-diagonal portion of A */
3430       for (j=jo; j<ncols_o; j++) {
3431         cj[k]   = cmap[*bj++];
3432         ca[k++] = *ba++;
3433       }
3434     }
3435     /* put together the new matrix */
3436     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3437     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3438     /* Since these are PETSc arrays, change flags to free them as necessary. */
3439     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3440     mat->freedata = PETSC_TRUE;
3441     mat->nonew    = 0;
3442   } else if (scall == MAT_REUSE_MATRIX){
3443     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3444     ci = mat->i; cj = mat->j; ca = mat->a;
3445     for (i=0; i<am; i++) {
3446       /* off-diagonal portion of A */
3447       ncols_o = bi[i+1] - bi[i];
3448       for (jo=0; jo<ncols_o; jo++) {
3449         col = cmap[*bj];
3450         if (col >= cstart) break;
3451         *ca++ = *ba++; bj++;
3452       }
3453       /* diagonal portion of A */
3454       ncols_d = ai[i+1] - ai[i];
3455       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3456       /* off-diagonal portion of A */
3457       for (j=jo; j<ncols_o; j++) {
3458         *ca++ = *ba++; bj++;
3459       }
3460     }
3461   } else {
3462     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3463   }
3464 
3465   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3466   PetscFunctionReturn(0);
3467 }
3468 
3469 static PetscEvent logkey_getlocalmatcondensed = 0;
3470 #undef __FUNCT__
3471 #define __FUNCT__ "MatGetLocalMatCondensed"
3472 /*@C
3473      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3474 
3475     Not Collective
3476 
3477    Input Parameters:
3478 +    A - the matrix
3479 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3480 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3481 
3482    Output Parameter:
3483 .    A_loc - the local sequential matrix generated
3484 
3485     Level: developer
3486 
3487 @*/
3488 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3489 {
3490   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3491   PetscErrorCode    ierr;
3492   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3493   IS                isrowa,iscola;
3494   Mat               *aloc;
3495 
3496   PetscFunctionBegin;
3497   if (!logkey_getlocalmatcondensed) {
3498     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3499   }
3500   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3501   if (!row){
3502     start = a->rstart; end = a->rend;
3503     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3504   } else {
3505     isrowa = *row;
3506   }
3507   if (!col){
3508     start = a->cstart;
3509     cmap  = a->garray;
3510     nzA   = a->A->n;
3511     nzB   = a->B->n;
3512     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3513     ncols = 0;
3514     for (i=0; i<nzB; i++) {
3515       if (cmap[i] < start) idx[ncols++] = cmap[i];
3516       else break;
3517     }
3518     imark = i;
3519     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3520     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3521     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3522     ierr = PetscFree(idx);CHKERRQ(ierr);
3523   } else {
3524     iscola = *col;
3525   }
3526   if (scall != MAT_INITIAL_MATRIX){
3527     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3528     aloc[0] = *A_loc;
3529   }
3530   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3531   *A_loc = aloc[0];
3532   ierr = PetscFree(aloc);CHKERRQ(ierr);
3533   if (!row){
3534     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3535   }
3536   if (!col){
3537     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3538   }
3539   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3540   PetscFunctionReturn(0);
3541 }
3542 
3543 static PetscEvent logkey_GetBrowsOfAcols = 0;
3544 #undef __FUNCT__
3545 #define __FUNCT__ "MatGetBrowsOfAcols"
3546 /*@C
3547     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3548 
3549     Collective on Mat
3550 
3551    Input Parameters:
3552 +    A,B - the matrices in mpiaij format
3553 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3554 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3555 
3556    Output Parameter:
3557 +    rowb, colb - index sets of rows and columns of B to extract
3558 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3559 -    B_seq - the sequential matrix generated
3560 
3561     Level: developer
3562 
3563 @*/
3564 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3565 {
3566   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3567   PetscErrorCode    ierr;
3568   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3569   IS                isrowb,iscolb;
3570   Mat               *bseq;
3571 
3572   PetscFunctionBegin;
3573   if (a->cstart != b->rstart || a->cend != b->rend){
3574     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3575   }
3576   if (!logkey_GetBrowsOfAcols) {
3577     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3578   }
3579   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3580 
3581   if (scall == MAT_INITIAL_MATRIX){
3582     start = a->cstart;
3583     cmap  = a->garray;
3584     nzA   = a->A->n;
3585     nzB   = a->B->n;
3586     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3587     ncols = 0;
3588     for (i=0; i<nzB; i++) {  /* row < local row index */
3589       if (cmap[i] < start) idx[ncols++] = cmap[i];
3590       else break;
3591     }
3592     imark = i;
3593     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3594     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3595     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3596     ierr = PetscFree(idx);CHKERRQ(ierr);
3597     *brstart = imark;
3598     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3599   } else {
3600     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3601     isrowb = *rowb; iscolb = *colb;
3602     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3603     bseq[0] = *B_seq;
3604   }
3605   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3606   *B_seq = bseq[0];
3607   ierr = PetscFree(bseq);CHKERRQ(ierr);
3608   if (!rowb){
3609     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3610   } else {
3611     *rowb = isrowb;
3612   }
3613   if (!colb){
3614     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3615   } else {
3616     *colb = iscolb;
3617   }
3618   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3619   PetscFunctionReturn(0);
3620 }
3621 
3622 static PetscEvent logkey_GetBrowsOfAocols = 0;
3623 #undef __FUNCT__
3624 #define __FUNCT__ "MatGetBrowsOfAoCols"
3625 /*@C
3626     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3627     of the OFF-DIAGONAL portion of local A
3628 
3629     Collective on Mat
3630 
3631    Input Parameters:
3632 +    A,B - the matrices in mpiaij format
3633 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3634 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3635 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3636 
3637    Output Parameter:
3638 +    B_oth - the sequential matrix generated
3639 
3640     Level: developer
3641 
3642 @*/
3643 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3644 {
3645   VecScatter_MPI_General *gen_to,*gen_from;
3646   PetscErrorCode         ierr;
3647   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3648   Mat_SeqAIJ             *b_oth;
3649   VecScatter             ctx=a->Mvctx;
3650   MPI_Comm               comm=ctx->comm;
3651   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3652   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3653   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3654   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3655   MPI_Request            *rwaits,*swaits;
3656   MPI_Status             *sstatus,rstatus;
3657   PetscInt               *cols;
3658   PetscScalar            *vals;
3659   PetscMPIInt            j;
3660 
3661   PetscFunctionBegin;
3662   if (a->cstart != b->rstart || a->cend != b->rend){
3663     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3664   }
3665   if (!logkey_GetBrowsOfAocols) {
3666     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3667   }
3668   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3669   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3670 
3671   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3672   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3673   rvalues  = gen_from->values; /* holds the length of sending row */
3674   svalues  = gen_to->values;   /* holds the length of receiving row */
3675   nrecvs   = gen_from->n;
3676   nsends   = gen_to->n;
3677   rwaits   = gen_from->requests;
3678   swaits   = gen_to->requests;
3679   srow     = gen_to->indices;   /* local row index to be sent */
3680   rstarts  = gen_from->starts;
3681   sstarts  = gen_to->starts;
3682   rprocs   = gen_from->procs;
3683   sprocs   = gen_to->procs;
3684   sstatus  = gen_to->sstatus;
3685 
3686   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3687   if (scall == MAT_INITIAL_MATRIX){
3688     /* i-array */
3689     /*---------*/
3690     /*  post receives */
3691     for (i=0; i<nrecvs; i++){
3692       rowlen = (PetscInt*)rvalues + rstarts[i];
3693       nrows = rstarts[i+1]-rstarts[i];
3694       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3695     }
3696 
3697     /* pack the outgoing message */
3698     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3699     rstartsj = sstartsj + nsends +1;
3700     sstartsj[0] = 0;  rstartsj[0] = 0;
3701     len = 0; /* total length of j or a array to be sent */
3702     k = 0;
3703     for (i=0; i<nsends; i++){
3704       rowlen = (PetscInt*)svalues + sstarts[i];
3705       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3706       for (j=0; j<nrows; j++) {
3707         row = srow[k] + b->rowners[rank]; /* global row idx */
3708         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3709         len += rowlen[j];
3710         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3711         k++;
3712       }
3713       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3714        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3715     }
3716     /* recvs and sends of i-array are completed */
3717     i = nrecvs;
3718     while (i--) {
3719       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3720     }
3721     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3722     /* allocate buffers for sending j and a arrays */
3723     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3724     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3725 
3726     /* create i-array of B_oth */
3727     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3728     b_othi[0] = 0;
3729     len = 0; /* total length of j or a array to be received */
3730     k = 0;
3731     for (i=0; i<nrecvs; i++){
3732       rowlen = (PetscInt*)rvalues + rstarts[i];
3733       nrows = rstarts[i+1]-rstarts[i];
3734       for (j=0; j<nrows; j++) {
3735         b_othi[k+1] = b_othi[k] + rowlen[j];
3736         len += rowlen[j]; k++;
3737       }
3738       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3739     }
3740 
3741     /* allocate space for j and a arrrays of B_oth */
3742     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3743     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3744 
3745     /* j-array */
3746     /*---------*/
3747     /*  post receives of j-array */
3748     for (i=0; i<nrecvs; i++){
3749       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3750       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3751     }
3752     k = 0;
3753     for (i=0; i<nsends; i++){
3754       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3755       bufJ = bufj+sstartsj[i];
3756       for (j=0; j<nrows; j++) {
3757         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3758         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3759         for (l=0; l<ncols; l++){
3760           *bufJ++ = cols[l];
3761         }
3762         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3763       }
3764       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3765     }
3766 
3767     /* recvs and sends of j-array are completed */
3768     i = nrecvs;
3769     while (i--) {
3770       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3771     }
3772     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3773   } else if (scall == MAT_REUSE_MATRIX){
3774     sstartsj = *startsj;
3775     rstartsj = sstartsj + nsends +1;
3776     bufa     = *bufa_ptr;
3777     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3778     b_otha   = b_oth->a;
3779   } else {
3780     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3781   }
3782 
3783   /* a-array */
3784   /*---------*/
3785   /*  post receives of a-array */
3786   for (i=0; i<nrecvs; i++){
3787     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3788     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3789   }
3790   k = 0;
3791   for (i=0; i<nsends; i++){
3792     nrows = sstarts[i+1]-sstarts[i];
3793     bufA = bufa+sstartsj[i];
3794     for (j=0; j<nrows; j++) {
3795       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3796       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3797       for (l=0; l<ncols; l++){
3798         *bufA++ = vals[l];
3799       }
3800       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3801 
3802     }
3803     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3804   }
3805   /* recvs and sends of a-array are completed */
3806   i = nrecvs;
3807   while (i--) {
3808     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3809   }
3810    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3811 
3812   if (scall == MAT_INITIAL_MATRIX){
3813     /* put together the new matrix */
3814     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3815 
3816     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3817     /* Since these are PETSc arrays, change flags to free them as necessary. */
3818     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3819     b_oth->freedata = PETSC_TRUE;
3820     b_oth->nonew    = 0;
3821 
3822     ierr = PetscFree(bufj);CHKERRQ(ierr);
3823     if (!startsj || !bufa_ptr){
3824       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3825       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3826     } else {
3827       *startsj  = sstartsj;
3828       *bufa_ptr = bufa;
3829     }
3830   }
3831   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3832 
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 #undef __FUNCT__
3837 #define __FUNCT__ "MatGetCommunicationStructs"
3838 /*@C
3839   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3840 
3841   Not Collective
3842 
3843   Input Parameters:
3844 . A - The matrix in mpiaij format
3845 
3846   Output Parameter:
3847 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
3848 . colmap - A map from global column index to local index into lvec
3849 - multScatter - A scatter from the argument of a matrix-vector product to lvec
3850 
3851   Level: developer
3852 
3853 @*/
3854 #if defined (PETSC_USE_CTABLE)
3855 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
3856 #else
3857 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
3858 #endif
3859 {
3860   Mat_MPIAIJ *a;
3861 
3862   PetscFunctionBegin;
3863   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
3864   PetscValidPointer(lvec, 2)
3865   PetscValidPointer(colmap, 3)
3866   PetscValidPointer(multScatter, 4)
3867   a = (Mat_MPIAIJ *) A->data;
3868   if (lvec) *lvec = a->lvec;
3869   if (colmap) *colmap = a->colmap;
3870   if (multScatter) *multScatter = a->Mvctx;
3871   PetscFunctionReturn(0);
3872 }
3873 
3874 /*MC
3875    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3876 
3877    Options Database Keys:
3878 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3879 
3880   Level: beginner
3881 
3882 .seealso: MatCreateMPIAIJ
3883 M*/
3884 
3885 EXTERN_C_BEGIN
3886 #undef __FUNCT__
3887 #define __FUNCT__ "MatCreate_MPIAIJ"
3888 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3889 {
3890   Mat_MPIAIJ     *b;
3891   PetscErrorCode ierr;
3892   PetscInt       i;
3893   PetscMPIInt    size;
3894 
3895   PetscFunctionBegin;
3896   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3897 
3898   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3899   B->data         = (void*)b;
3900   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3901   B->factor       = 0;
3902   B->bs           = 1;
3903   B->assembled    = PETSC_FALSE;
3904   B->mapping      = 0;
3905 
3906   B->insertmode      = NOT_SET_VALUES;
3907   b->size            = size;
3908   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3909 
3910   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3911   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3912 
3913   /* the information in the maps duplicates the information computed below, eventually
3914      we should remove the duplicate information that is not contained in the maps */
3915   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3916   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3917 
3918   /* build local table of row and column ownerships */
3919   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3920   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3921   b->cowners = b->rowners + b->size + 2;
3922   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3923   b->rowners[0] = 0;
3924   for (i=2; i<=b->size; i++) {
3925     b->rowners[i] += b->rowners[i-1];
3926   }
3927   b->rstart = b->rowners[b->rank];
3928   b->rend   = b->rowners[b->rank+1];
3929   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3930   b->cowners[0] = 0;
3931   for (i=2; i<=b->size; i++) {
3932     b->cowners[i] += b->cowners[i-1];
3933   }
3934   b->cstart = b->cowners[b->rank];
3935   b->cend   = b->cowners[b->rank+1];
3936 
3937   /* build cache for off array entries formed */
3938   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3939   b->donotstash  = PETSC_FALSE;
3940   b->colmap      = 0;
3941   b->garray      = 0;
3942   b->roworiented = PETSC_TRUE;
3943 
3944   /* stuff used for matrix vector multiply */
3945   b->lvec      = PETSC_NULL;
3946   b->Mvctx     = PETSC_NULL;
3947 
3948   /* stuff for MatGetRow() */
3949   b->rowindices   = 0;
3950   b->rowvalues    = 0;
3951   b->getrowactive = PETSC_FALSE;
3952 
3953   /* Explicitly create 2 MATSEQAIJ matrices. */
3954   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3955   ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr);
3956   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3957   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3958   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3959   ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr);
3960   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3961   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3962 
3963   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3964                                      "MatStoreValues_MPIAIJ",
3965                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3966   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3967                                      "MatRetrieveValues_MPIAIJ",
3968                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3969   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3970 				     "MatGetDiagonalBlock_MPIAIJ",
3971                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3972   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3973 				     "MatIsTranspose_MPIAIJ",
3974 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3975   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3976 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3977 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3978   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3979 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3980 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3981   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3982 				     "MatDiagonalScaleLocal_MPIAIJ",
3983 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3984   PetscFunctionReturn(0);
3985 }
3986 EXTERN_C_END
3987 
3988 /*
3989     Special version for direct calls from Fortran
3990 */
3991 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3992 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
3993 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3994 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
3995 #endif
3996 
3997 /* Change these macros so can be used in void function */
3998 #undef CHKERRQ
3999 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4000 #undef SETERRQ2
4001 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4002 #undef SETERRQ
4003 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4004 
4005 EXTERN_C_BEGIN
4006 #undef __FUNCT__
4007 #define __FUNCT__ "matsetvaluesmpiaij_"
4008 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv)
4009 {
4010   Mat            mat = *mmat;
4011   PetscInt       m = *mm, n = *mn;
4012   InsertMode     addv = *maddv;
4013   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4014   PetscScalar    value;
4015   PetscErrorCode ierr;
4016   MatPreallocated(mat);
4017   if (mat->insertmode == NOT_SET_VALUES) {
4018     mat->insertmode = addv;
4019   }
4020 #if defined(PETSC_USE_DEBUG)
4021   else if (mat->insertmode != addv) {
4022     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4023   }
4024 #endif
4025   {
4026   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
4027   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
4028   PetscTruth     roworiented = aij->roworiented;
4029 
4030   /* Some Variables required in the macro */
4031   Mat            A = aij->A;
4032   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4033   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4034   PetscScalar    *aa = a->a;
4035   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4036   Mat            B = aij->B;
4037   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4038   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
4039   PetscScalar    *ba = b->a;
4040 
4041   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4042   PetscInt       nonew = a->nonew;
4043   PetscScalar    *ap1,*ap2;
4044 
4045   PetscFunctionBegin;
4046   for (i=0; i<m; i++) {
4047     if (im[i] < 0) continue;
4048 #if defined(PETSC_USE_DEBUG)
4049     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
4050 #endif
4051     if (im[i] >= rstart && im[i] < rend) {
4052       row      = im[i] - rstart;
4053       lastcol1 = -1;
4054       rp1      = aj + ai[row];
4055       ap1      = aa + ai[row];
4056       rmax1    = aimax[row];
4057       nrow1    = ailen[row];
4058       low1     = 0;
4059       high1    = nrow1;
4060       lastcol2 = -1;
4061       rp2      = bj + bi[row];
4062       ap2      = ba + bi[row];
4063       rmax2    = bimax[row];
4064       nrow2    = bilen[row];
4065       low2     = 0;
4066       high2    = nrow2;
4067 
4068       for (j=0; j<n; j++) {
4069         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4070         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4071         if (in[j] >= cstart && in[j] < cend){
4072           col = in[j] - cstart;
4073           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4074         } else if (in[j] < 0) continue;
4075 #if defined(PETSC_USE_DEBUG)
4076         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
4077 #endif
4078         else {
4079           if (mat->was_assembled) {
4080             if (!aij->colmap) {
4081               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4082             }
4083 #if defined (PETSC_USE_CTABLE)
4084             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4085 	    col--;
4086 #else
4087             col = aij->colmap[in[j]] - 1;
4088 #endif
4089             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4090               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4091               col =  in[j];
4092               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4093               B = aij->B;
4094               b = (Mat_SeqAIJ*)B->data;
4095               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4096               rp2      = bj + bi[row];
4097               ap2      = ba + bi[row];
4098               rmax2    = bimax[row];
4099               nrow2    = bilen[row];
4100               low2     = 0;
4101               high2    = nrow2;
4102               bm       = aij->B->m;
4103               ba = b->a;
4104             }
4105           } else col = in[j];
4106           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4107         }
4108       }
4109     } else {
4110       if (!aij->donotstash) {
4111         if (roworiented) {
4112           if (ignorezeroentries && v[i*n] == 0.0) continue;
4113           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4114         } else {
4115           if (ignorezeroentries && v[i] == 0.0) continue;
4116           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4117         }
4118       }
4119     }
4120   }}
4121   PetscFunctionReturnVoid();
4122 }
4123 EXTERN_C_END
4124