xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1d2e40055fe3bf783df36eeeccf2d7d5fcfab0fd)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324   PetscFunctionBegin;
325   if (!aij->donotstash) {
326     while (1) {
327       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
328       if (!flg) break;
329 
330       for (i=0; i<n;) {
331         /* Now identify the consecutive vals belonging to the same row */
332         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
333         if (j < n) ncols = j-i;
334         else       ncols = n-i;
335         /* Now assemble all these values with a single function call */
336         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
337         i = j;
338       }
339     }
340     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
341   }
342   a->compressedrow.use     = PETSC_FALSE;
343   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
345 
346   /* determine if any processor has disassembled, if so we must
347      also disassemble ourselfs, in order that we may reassemble. */
348   /*
349      if nonzero structure of submatrix B cannot change then we know that
350      no processor disassembled thus we can skip this stuff
351   */
352   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
353     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
354     if (mat->was_assembled && !other_disassembled) {
355       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
356     }
357   }
358   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
359     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
360   }
361   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
362   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
363   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
364   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
365 
366   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
367   aij->rowvalues = 0;
368 
369   /* used by MatAXPY() */
370   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
371   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
379 {
380   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
385   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatZeroRows_MPIAIJ"
391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
392 {
393   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394   PetscErrorCode ierr;
395   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
396   PetscInt       i,*owners = A->rmap.range;
397   PetscInt       *nprocs,j,idx,nsends,row;
398   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
399   PetscInt       *rvalues,count,base,slen,*source;
400   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
401   MPI_Comm       comm = A->comm;
402   MPI_Request    *send_waits,*recv_waits;
403   MPI_Status     recv_status,*send_status;
404 #if defined(PETSC_DEBUG)
405   PetscTruth     found = PETSC_FALSE;
406 #endif
407 
408   PetscFunctionBegin;
409   /*  first count number of contributors to each processor */
410   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
411   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
412   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
413   j = 0;
414   for (i=0; i<N; i++) {
415     if (lastidx > (idx = rows[i])) j = 0;
416     lastidx = idx;
417     for (; j<size; j++) {
418       if (idx >= owners[j] && idx < owners[j+1]) {
419         nprocs[2*j]++;
420         nprocs[2*j+1] = 1;
421         owner[i] = j;
422 #if defined(PETSC_DEBUG)
423         found = PETSC_TRUE;
424 #endif
425         break;
426       }
427     }
428 #if defined(PETSC_DEBUG)
429     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
430     found = PETSC_FALSE;
431 #endif
432   }
433   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434 
435   /* inform other processors of number of messages and max length*/
436   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
437 
438   /* post receives:   */
439   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
440   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
441   for (i=0; i<nrecvs; i++) {
442     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
443   }
444 
445   /* do sends:
446       1) starts[i] gives the starting index in svalues for stuff going to
447          the ith processor
448   */
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
450   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
452   starts[0] = 0;
453   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
454   for (i=0; i<N; i++) {
455     svalues[starts[owner[i]]++] = rows[i];
456   }
457 
458   starts[0] = 0;
459   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
460   count = 0;
461   for (i=0; i<size; i++) {
462     if (nprocs[2*i+1]) {
463       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
464     }
465   }
466   ierr = PetscFree(starts);CHKERRQ(ierr);
467 
468   base = owners[rank];
469 
470   /*  wait on receives */
471   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
472   source = lens + nrecvs;
473   count  = nrecvs; slen = 0;
474   while (count) {
475     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
476     /* unpack receives into our local space */
477     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
478     source[imdex]  = recv_status.MPI_SOURCE;
479     lens[imdex]    = n;
480     slen          += n;
481     count--;
482   }
483   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
484 
485   /* move the data into the send scatter */
486   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
487   count = 0;
488   for (i=0; i<nrecvs; i++) {
489     values = rvalues + i*nmax;
490     for (j=0; j<lens[i]; j++) {
491       lrows[count++] = values[j] - base;
492     }
493   }
494   ierr = PetscFree(rvalues);CHKERRQ(ierr);
495   ierr = PetscFree(lens);CHKERRQ(ierr);
496   ierr = PetscFree(owner);CHKERRQ(ierr);
497   ierr = PetscFree(nprocs);CHKERRQ(ierr);
498 
499   /* actually zap the local rows */
500   /*
501         Zero the required rows. If the "diagonal block" of the matrix
502      is square and the user wishes to set the diagonal we use separate
503      code so that MatSetValues() is not called for each diagonal allocating
504      new memory, thus calling lots of mallocs and slowing things down.
505 
506        Contributed by: Matthew Knepley
507   */
508   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
509   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
510   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
511     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
512   } else if (diag != 0.0) {
513     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
514     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
515       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
517     }
518     for (i = 0; i < slen; i++) {
519       row  = lrows[i] + rstart;
520       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
521     }
522     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
524   } else {
525     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
526   }
527   ierr = PetscFree(lrows);CHKERRQ(ierr);
528 
529   /* wait on sends */
530   if (nsends) {
531     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
532     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
533     ierr = PetscFree(send_status);CHKERRQ(ierr);
534   }
535   ierr = PetscFree(send_waits);CHKERRQ(ierr);
536   ierr = PetscFree(svalues);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_MPIAIJ"
543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
544 {
545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546   PetscErrorCode ierr;
547   PetscInt       nt;
548 
549   PetscFunctionBegin;
550   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
551   if (nt != A->cmap.n) {
552     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
553   }
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultAdd_MPIAIJ"
563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
570   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscTruth     merged;
583 
584   PetscFunctionBegin;
585   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
586   /* do nondiagonal part */
587   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
588   if (!merged) {
589     /* send it on its way */
590     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
591     /* do local part */
592     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
593     /* receive remote parts: note this assumes the values are not actually */
594     /* added in yy until the next line, */
595     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
596   } else {
597     /* do local part */
598     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
599     /* send it on its way */
600     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
601     /* values actually were received in the Begin() but we need to call this nop */
602     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
611 {
612   MPI_Comm       comm;
613   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
614   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
615   IS             Me,Notme;
616   PetscErrorCode ierr;
617   PetscInt       M,N,first,last,*notme,i;
618   PetscMPIInt    size;
619 
620   PetscFunctionBegin;
621 
622   /* Easy test: symmetric diagonal block */
623   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
624   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
625   if (!*f) PetscFunctionReturn(0);
626   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
628   if (size == 1) PetscFunctionReturn(0);
629 
630   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
631   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
632   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
633   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
634   for (i=0; i<first; i++) notme[i] = i;
635   for (i=last; i<M; i++) notme[i-last+first] = i;
636   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
637   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
638   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
639   Aoff = Aoffs[0];
640   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
641   Boff = Boffs[0];
642   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
644   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
645   ierr = ISDestroy(Me);CHKERRQ(ierr);
646   ierr = ISDestroy(Notme);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 EXTERN_C_END
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   /* do nondiagonal part */
661   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
662   /* send it on its way */
663   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
664   /* do local part */
665   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
666   /* receive remote parts */
667   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672   This only works correctly for square matrices where the subblock A->A is the
673    diagonal block
674 */
675 #undef __FUNCT__
676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
678 {
679   PetscErrorCode ierr;
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681 
682   PetscFunctionBegin;
683   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
684   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
685     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
686   }
687   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatScale_MPIAIJ"
693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
694 {
695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
700   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatDestroy_MPIAIJ"
706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
707 {
708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712 #if defined(PETSC_USE_LOG)
713   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
714 #endif
715   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
717   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
718 #if defined (PETSC_USE_CTABLE)
719   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
720 #else
721   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
722 #endif
723   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
724   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
725   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
726   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatView_MPIAIJ_Binary"
741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
742 {
743   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
744   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
745   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
746   PetscErrorCode    ierr;
747   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
748   int               fd;
749   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
750   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
751   PetscScalar       *column_values;
752 
753   PetscFunctionBegin;
754   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
756   nz   = A->nz + B->nz;
757   if (!rank) {
758     header[0] = MAT_FILE_COOKIE;
759     header[1] = mat->rmap.N;
760     header[2] = mat->cmap.N;
761     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
762     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
763     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
764     /* get largest number of rows any processor has */
765     rlen = mat->rmap.n;
766     range = mat->rmap.range;
767     for (i=1; i<size; i++) {
768       rlen = PetscMax(rlen,range[i+1] - range[i]);
769     }
770   } else {
771     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     rlen = mat->rmap.n;
773   }
774 
775   /* load up the local row counts */
776   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
777   for (i=0; i<mat->rmap.n; i++) {
778     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
779   }
780 
781   /* store the row lengths to the file */
782   if (!rank) {
783     MPI_Status status;
784     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
785     for (i=1; i<size; i++) {
786       rlen = range[i+1] - range[i];
787       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
788       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     }
790   } else {
791     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
792   }
793   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
794 
795   /* load up the local column indices */
796   nzmax = nz; /* )th processor needs space a largest processor needs */
797   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
798   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
799   cnt  = 0;
800   for (i=0; i<mat->rmap.n; i++) {
801     for (j=B->i[i]; j<B->i[i+1]; j++) {
802       if ( (col = garray[B->j[j]]) > cstart) break;
803       column_indices[cnt++] = col;
804     }
805     for (k=A->i[i]; k<A->i[i+1]; k++) {
806       column_indices[cnt++] = A->j[k] + cstart;
807     }
808     for (; j<B->i[i+1]; j++) {
809       column_indices[cnt++] = garray[B->j[j]];
810     }
811   }
812   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
813 
814   /* store the column indices to the file */
815   if (!rank) {
816     MPI_Status status;
817     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     for (i=1; i<size; i++) {
819       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
820       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
821       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
823     }
824   } else {
825     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
826     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827   }
828   ierr = PetscFree(column_indices);CHKERRQ(ierr);
829 
830   /* load up the local column values */
831   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
832   cnt  = 0;
833   for (i=0; i<mat->rmap.n; i++) {
834     for (j=B->i[i]; j<B->i[i+1]; j++) {
835       if ( garray[B->j[j]] > cstart) break;
836       column_values[cnt++] = B->a[j];
837     }
838     for (k=A->i[i]; k<A->i[i+1]; k++) {
839       column_values[cnt++] = A->a[k];
840     }
841     for (; j<B->i[i+1]; j++) {
842       column_values[cnt++] = B->a[j];
843     }
844   }
845   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
846 
847   /* store the column values to the file */
848   if (!rank) {
849     MPI_Status status;
850     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
851     for (i=1; i<size; i++) {
852       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
853       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
854       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
856     }
857   } else {
858     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
859     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
860   }
861   ierr = PetscFree(column_values);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
868 {
869   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
870   PetscErrorCode    ierr;
871   PetscMPIInt       rank = aij->rank,size = aij->size;
872   PetscTruth        isdraw,iascii,isbinary;
873   PetscViewer       sviewer;
874   PetscViewerFormat format;
875 
876   PetscFunctionBegin;
877   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
880   if (iascii) {
881     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
882     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
883       MatInfo    info;
884       PetscTruth inodes;
885 
886       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
889       if (!inodes) {
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
891 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
892       } else {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
894 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
895       }
896       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
898       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904       PetscInt   inodecount,inodelimit,*inodes;
905       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
906       if (inodes) {
907         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
908       } else {
909         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
910       }
911       PetscFunctionReturn(0);
912     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
913       PetscFunctionReturn(0);
914     }
915   } else if (isbinary) {
916     if (size == 1) {
917       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919     } else {
920       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
921     }
922     PetscFunctionReturn(0);
923   } else if (isdraw) {
924     PetscDraw  draw;
925     PetscTruth isnull;
926     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
927     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
928   }
929 
930   if (size == 1) {
931     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
932     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
933   } else {
934     /* assemble the entire matrix onto first processor. */
935     Mat         A;
936     Mat_SeqAIJ  *Aloc;
937     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
938     PetscScalar *a;
939 
940     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
941     if (!rank) {
942       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
943     } else {
944       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
945     }
946     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
947     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
948     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
949     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqAIJ*)aij->A->data;
953     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row = mat->rmap.rstart;
955     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
959     }
960     aj = Aloc->j;
961     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
962 
963     /* copy over the B part */
964     Aloc = (Mat_SeqAIJ*)aij->B->data;
965     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
966     row  = mat->rmap.rstart;
967     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
968     ct   = cols;
969     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
973     }
974     ierr = PetscFree(ct);CHKERRQ(ierr);
975     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     /*
978        Everyone has to call to draw the matrix since the graphics waits are
979        synchronized across all processors that share the PetscDraw object
980     */
981     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
984       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
985     }
986     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     ierr = MatDestroy(A);CHKERRQ(ierr);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatView_MPIAIJ"
994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
995 {
996   PetscErrorCode ierr;
997   PetscTruth     iascii,isdraw,issocket,isbinary;
998 
999   PetscFunctionBegin;
1000   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1002   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1004   if (iascii || isdraw || isbinary || issocket) {
1005     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1006   } else {
1007     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NO_NEW_NONZERO_LOCATIONS:
1207   case MAT_YES_NEW_NONZERO_LOCATIONS:
1208   case MAT_COLUMNS_UNSORTED:
1209   case MAT_COLUMNS_SORTED:
1210   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211   case MAT_KEEP_ZEROED_ROWS:
1212   case MAT_NEW_NONZERO_LOCATION_ERR:
1213   case MAT_USE_INODES:
1214   case MAT_DO_NOT_USE_INODES:
1215   case MAT_IGNORE_ZERO_ENTRIES:
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1218     break;
1219   case MAT_ROW_ORIENTED:
1220     a->roworiented = PETSC_TRUE;
1221     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1222     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1223     break;
1224   case MAT_ROWS_SORTED:
1225   case MAT_ROWS_UNSORTED:
1226   case MAT_YES_NEW_DIAGONALS:
1227     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1228     break;
1229   case MAT_COLUMN_ORIENTED:
1230     a->roworiented = PETSC_FALSE;
1231     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1232     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1233     break;
1234   case MAT_IGNORE_OFF_PROC_ENTRIES:
1235     a->donotstash = PETSC_TRUE;
1236     break;
1237   case MAT_NO_NEW_DIAGONALS:
1238     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1239   case MAT_SYMMETRIC:
1240   case MAT_STRUCTURALLY_SYMMETRIC:
1241   case MAT_HERMITIAN:
1242   case MAT_SYMMETRY_ETERNAL:
1243     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1244     break;
1245   case MAT_NOT_SYMMETRIC:
1246   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1247   case MAT_NOT_HERMITIAN:
1248   case MAT_NOT_SYMMETRY_ETERNAL:
1249     break;
1250   default:
1251     SETERRQ(PETSC_ERR_SUP,"unknown option");
1252   }
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatGetRow_MPIAIJ"
1258 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1259 {
1260   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1261   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1262   PetscErrorCode ierr;
1263   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1264   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1265   PetscInt       *cmap,*idx_p;
1266 
1267   PetscFunctionBegin;
1268   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1269   mat->getrowactive = PETSC_TRUE;
1270 
1271   if (!mat->rowvalues && (idx || v)) {
1272     /*
1273         allocate enough space to hold information from the longest row.
1274     */
1275     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1276     PetscInt     max = 1,tmp;
1277     for (i=0; i<matin->rmap.n; i++) {
1278       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1279       if (max < tmp) { max = tmp; }
1280     }
1281     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1282     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1283   }
1284 
1285   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1286   lrow = row - rstart;
1287 
1288   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1289   if (!v)   {pvA = 0; pvB = 0;}
1290   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1291   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1292   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1293   nztot = nzA + nzB;
1294 
1295   cmap  = mat->garray;
1296   if (v  || idx) {
1297     if (nztot) {
1298       /* Sort by increasing column numbers, assuming A and B already sorted */
1299       PetscInt imark = -1;
1300       if (v) {
1301         *v = v_p = mat->rowvalues;
1302         for (i=0; i<nzB; i++) {
1303           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1304           else break;
1305         }
1306         imark = i;
1307         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1308         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1309       }
1310       if (idx) {
1311         *idx = idx_p = mat->rowindices;
1312         if (imark > -1) {
1313           for (i=0; i<imark; i++) {
1314             idx_p[i] = cmap[cworkB[i]];
1315           }
1316         } else {
1317           for (i=0; i<nzB; i++) {
1318             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1319             else break;
1320           }
1321           imark = i;
1322         }
1323         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1324         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1325       }
1326     } else {
1327       if (idx) *idx = 0;
1328       if (v)   *v   = 0;
1329     }
1330   }
1331   *nz = nztot;
1332   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1333   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1339 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340 {
1341   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1342 
1343   PetscFunctionBegin;
1344   if (!aij->getrowactive) {
1345     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1346   }
1347   aij->getrowactive = PETSC_FALSE;
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatNorm_MPIAIJ"
1353 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1354 {
1355   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1356   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1357   PetscErrorCode ierr;
1358   PetscInt       i,j,cstart = mat->cmap.rstart;
1359   PetscReal      sum = 0.0;
1360   PetscScalar    *v;
1361 
1362   PetscFunctionBegin;
1363   if (aij->size == 1) {
1364     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1365   } else {
1366     if (type == NORM_FROBENIUS) {
1367       v = amat->a;
1368       for (i=0; i<amat->nz; i++) {
1369 #if defined(PETSC_USE_COMPLEX)
1370         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1371 #else
1372         sum += (*v)*(*v); v++;
1373 #endif
1374       }
1375       v = bmat->a;
1376       for (i=0; i<bmat->nz; i++) {
1377 #if defined(PETSC_USE_COMPLEX)
1378         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1379 #else
1380         sum += (*v)*(*v); v++;
1381 #endif
1382       }
1383       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1384       *norm = sqrt(*norm);
1385     } else if (type == NORM_1) { /* max column norm */
1386       PetscReal *tmp,*tmp2;
1387       PetscInt    *jj,*garray = aij->garray;
1388       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1389       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1390       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1391       *norm = 0.0;
1392       v = amat->a; jj = amat->j;
1393       for (j=0; j<amat->nz; j++) {
1394         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1395       }
1396       v = bmat->a; jj = bmat->j;
1397       for (j=0; j<bmat->nz; j++) {
1398         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1399       }
1400       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1401       for (j=0; j<mat->cmap.N; j++) {
1402         if (tmp2[j] > *norm) *norm = tmp2[j];
1403       }
1404       ierr = PetscFree(tmp);CHKERRQ(ierr);
1405       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1406     } else if (type == NORM_INFINITY) { /* max row norm */
1407       PetscReal ntemp = 0.0;
1408       for (j=0; j<aij->A->rmap.n; j++) {
1409         v = amat->a + amat->i[j];
1410         sum = 0.0;
1411         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1412           sum += PetscAbsScalar(*v); v++;
1413         }
1414         v = bmat->a + bmat->i[j];
1415         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1416           sum += PetscAbsScalar(*v); v++;
1417         }
1418         if (sum > ntemp) ntemp = sum;
1419       }
1420       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1421     } else {
1422       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1423     }
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatTranspose_MPIAIJ"
1430 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1431 {
1432   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1433   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1434   PetscErrorCode ierr;
1435   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1436   Mat            B;
1437   PetscScalar    *array;
1438 
1439   PetscFunctionBegin;
1440   if (!matout && M != N) {
1441     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1442   }
1443 
1444   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1445   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1446   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1447   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1448 
1449   /* copy over the A part */
1450   Aloc = (Mat_SeqAIJ*)a->A->data;
1451   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1452   row = A->rmap.rstart;
1453   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1454   for (i=0; i<m; i++) {
1455     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1456     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1457   }
1458   aj = Aloc->j;
1459   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1460 
1461   /* copy over the B part */
1462   Aloc = (Mat_SeqAIJ*)a->B->data;
1463   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1464   row  = A->rmap.rstart;
1465   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1466   ct   = cols;
1467   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1468   for (i=0; i<m; i++) {
1469     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1470     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1471   }
1472   ierr = PetscFree(ct);CHKERRQ(ierr);
1473   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1474   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1475   if (matout) {
1476     *matout = B;
1477   } else {
1478     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1479   }
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1485 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1486 {
1487   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1488   Mat            a = aij->A,b = aij->B;
1489   PetscErrorCode ierr;
1490   PetscInt       s1,s2,s3;
1491 
1492   PetscFunctionBegin;
1493   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1494   if (rr) {
1495     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1496     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1497     /* Overlap communication with computation. */
1498     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1499   }
1500   if (ll) {
1501     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1502     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1503     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1504   }
1505   /* scale  the diagonal block */
1506   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1507 
1508   if (rr) {
1509     /* Do a scatter end and then right scale the off-diagonal block */
1510     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1511     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1512   }
1513 
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1520 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1521 {
1522   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1523   PetscErrorCode ierr;
1524 
1525   PetscFunctionBegin;
1526   if (!a->rank) {
1527     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1534 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1535 {
1536   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1541   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1542   PetscFunctionReturn(0);
1543 }
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1546 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1547 {
1548   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1549   PetscErrorCode ierr;
1550 
1551   PetscFunctionBegin;
1552   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatEqual_MPIAIJ"
1558 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1559 {
1560   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1561   Mat            a,b,c,d;
1562   PetscTruth     flg;
1563   PetscErrorCode ierr;
1564 
1565   PetscFunctionBegin;
1566   a = matA->A; b = matA->B;
1567   c = matB->A; d = matB->B;
1568 
1569   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1570   if (flg) {
1571     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1572   }
1573   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatCopy_MPIAIJ"
1579 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1580 {
1581   PetscErrorCode ierr;
1582   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1583   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1584 
1585   PetscFunctionBegin;
1586   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1587   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1588     /* because of the column compression in the off-processor part of the matrix a->B,
1589        the number of columns in a->B and b->B may be different, hence we cannot call
1590        the MatCopy() directly on the two parts. If need be, we can provide a more
1591        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1592        then copying the submatrices */
1593     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1594   } else {
1595     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1596     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1597   }
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1603 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1604 {
1605   PetscErrorCode ierr;
1606 
1607   PetscFunctionBegin;
1608   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 #include "petscblaslapack.h"
1613 #undef __FUNCT__
1614 #define __FUNCT__ "MatAXPY_MPIAIJ"
1615 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1616 {
1617   PetscErrorCode ierr;
1618   PetscInt       i;
1619   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1620   PetscBLASInt   bnz,one=1;
1621   Mat_SeqAIJ     *x,*y;
1622 
1623   PetscFunctionBegin;
1624   if (str == SAME_NONZERO_PATTERN) {
1625     PetscScalar alpha = a;
1626     x = (Mat_SeqAIJ *)xx->A->data;
1627     y = (Mat_SeqAIJ *)yy->A->data;
1628     bnz = (PetscBLASInt)x->nz;
1629     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1630     x = (Mat_SeqAIJ *)xx->B->data;
1631     y = (Mat_SeqAIJ *)yy->B->data;
1632     bnz = (PetscBLASInt)x->nz;
1633     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1634   } else if (str == SUBSET_NONZERO_PATTERN) {
1635     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1636 
1637     x = (Mat_SeqAIJ *)xx->B->data;
1638     y = (Mat_SeqAIJ *)yy->B->data;
1639     if (y->xtoy && y->XtoY != xx->B) {
1640       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1641       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1642     }
1643     if (!y->xtoy) { /* get xtoy */
1644       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1645       y->XtoY = xx->B;
1646     }
1647     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1648   } else {
1649     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1650   }
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatConjugate_MPIAIJ"
1658 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1659 {
1660 #if defined(PETSC_USE_COMPLEX)
1661   PetscErrorCode ierr;
1662   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1663 
1664   PetscFunctionBegin;
1665   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1666   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1667 #else
1668   PetscFunctionBegin;
1669 #endif
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatRealPart_MPIAIJ"
1675 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1676 {
1677   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1678   PetscErrorCode ierr;
1679 
1680   PetscFunctionBegin;
1681   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1682   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1688 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1689 {
1690   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1691   PetscErrorCode ierr;
1692 
1693   PetscFunctionBegin;
1694   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1695   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 /* -------------------------------------------------------------------*/
1700 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1701        MatGetRow_MPIAIJ,
1702        MatRestoreRow_MPIAIJ,
1703        MatMult_MPIAIJ,
1704 /* 4*/ MatMultAdd_MPIAIJ,
1705        MatMultTranspose_MPIAIJ,
1706        MatMultTransposeAdd_MPIAIJ,
1707        0,
1708        0,
1709        0,
1710 /*10*/ 0,
1711        0,
1712        0,
1713        MatRelax_MPIAIJ,
1714        MatTranspose_MPIAIJ,
1715 /*15*/ MatGetInfo_MPIAIJ,
1716        MatEqual_MPIAIJ,
1717        MatGetDiagonal_MPIAIJ,
1718        MatDiagonalScale_MPIAIJ,
1719        MatNorm_MPIAIJ,
1720 /*20*/ MatAssemblyBegin_MPIAIJ,
1721        MatAssemblyEnd_MPIAIJ,
1722        0,
1723        MatSetOption_MPIAIJ,
1724        MatZeroEntries_MPIAIJ,
1725 /*25*/ MatZeroRows_MPIAIJ,
1726        0,
1727        0,
1728        0,
1729        0,
1730 /*30*/ MatSetUpPreallocation_MPIAIJ,
1731        0,
1732        0,
1733        0,
1734        0,
1735 /*35*/ MatDuplicate_MPIAIJ,
1736        0,
1737        0,
1738        0,
1739        0,
1740 /*40*/ MatAXPY_MPIAIJ,
1741        MatGetSubMatrices_MPIAIJ,
1742        MatIncreaseOverlap_MPIAIJ,
1743        MatGetValues_MPIAIJ,
1744        MatCopy_MPIAIJ,
1745 /*45*/ MatPrintHelp_MPIAIJ,
1746        MatScale_MPIAIJ,
1747        0,
1748        0,
1749        0,
1750 /*50*/ MatSetBlockSize_MPIAIJ,
1751        0,
1752        0,
1753        0,
1754        0,
1755 /*55*/ MatFDColoringCreate_MPIAIJ,
1756        0,
1757        MatSetUnfactored_MPIAIJ,
1758        MatPermute_MPIAIJ,
1759        0,
1760 /*60*/ MatGetSubMatrix_MPIAIJ,
1761        MatDestroy_MPIAIJ,
1762        MatView_MPIAIJ,
1763        0,
1764        0,
1765 /*65*/ 0,
1766        0,
1767        0,
1768        0,
1769        0,
1770 /*70*/ 0,
1771        0,
1772        MatSetColoring_MPIAIJ,
1773 #if defined(PETSC_HAVE_ADIC)
1774        MatSetValuesAdic_MPIAIJ,
1775 #else
1776        0,
1777 #endif
1778        MatSetValuesAdifor_MPIAIJ,
1779 /*75*/ 0,
1780        0,
1781        0,
1782        0,
1783        0,
1784 /*80*/ 0,
1785        0,
1786        0,
1787        0,
1788 /*84*/ MatLoad_MPIAIJ,
1789        0,
1790        0,
1791        0,
1792        0,
1793        0,
1794 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1795        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1796        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1797        MatPtAP_Basic,
1798        MatPtAPSymbolic_MPIAIJ,
1799 /*95*/ MatPtAPNumeric_MPIAIJ,
1800        0,
1801        0,
1802        0,
1803        0,
1804 /*100*/0,
1805        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1806        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1807        MatConjugate_MPIAIJ,
1808        0,
1809 /*105*/MatSetValuesRow_MPIAIJ,
1810        MatRealPart_MPIAIJ,
1811        MatImaginaryPart_MPIAIJ};
1812 
1813 /* ----------------------------------------------------------------------------------------*/
1814 
1815 EXTERN_C_BEGIN
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1818 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1819 {
1820   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1821   PetscErrorCode ierr;
1822 
1823   PetscFunctionBegin;
1824   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1825   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 EXTERN_C_END
1829 
1830 EXTERN_C_BEGIN
1831 #undef __FUNCT__
1832 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1833 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1834 {
1835   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1836   PetscErrorCode ierr;
1837 
1838   PetscFunctionBegin;
1839   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1840   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1841   PetscFunctionReturn(0);
1842 }
1843 EXTERN_C_END
1844 
1845 #include "petscpc.h"
1846 EXTERN_C_BEGIN
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1849 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1850 {
1851   Mat_MPIAIJ     *b;
1852   PetscErrorCode ierr;
1853   PetscInt       i;
1854 
1855   PetscFunctionBegin;
1856   B->preallocated = PETSC_TRUE;
1857   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1858   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1859   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1860   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1861 
1862   B->rmap.bs = B->cmap.bs = 1;
1863   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1864   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1865   if (d_nnz) {
1866     for (i=0; i<B->rmap.n; i++) {
1867       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1868     }
1869   }
1870   if (o_nnz) {
1871     for (i=0; i<B->rmap.n; i++) {
1872       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1873     }
1874   }
1875   b = (Mat_MPIAIJ*)B->data;
1876 
1877   /* Explicitly create 2 MATSEQAIJ matrices. */
1878   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1879   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1880   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1881   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1882   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1883   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1884   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1885   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1886 
1887   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1888   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1889 
1890   PetscFunctionReturn(0);
1891 }
1892 EXTERN_C_END
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1896 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1897 {
1898   Mat            mat;
1899   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1900   PetscErrorCode ierr;
1901 
1902   PetscFunctionBegin;
1903   *newmat       = 0;
1904   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1905   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
1906   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1907   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1908   a    = (Mat_MPIAIJ*)mat->data;
1909 
1910   mat->factor       = matin->factor;
1911   mat->rmap.bs      = matin->rmap.bs;
1912   mat->assembled    = PETSC_TRUE;
1913   mat->insertmode   = NOT_SET_VALUES;
1914   mat->preallocated = PETSC_TRUE;
1915 
1916   a->size           = oldmat->size;
1917   a->rank           = oldmat->rank;
1918   a->donotstash     = oldmat->donotstash;
1919   a->roworiented    = oldmat->roworiented;
1920   a->rowindices     = 0;
1921   a->rowvalues      = 0;
1922   a->getrowactive   = PETSC_FALSE;
1923 
1924   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
1925   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
1926 
1927   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1928   if (oldmat->colmap) {
1929 #if defined (PETSC_USE_CTABLE)
1930     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1931 #else
1932     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1933     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1934     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1935 #endif
1936   } else a->colmap = 0;
1937   if (oldmat->garray) {
1938     PetscInt len;
1939     len  = oldmat->B->cmap.n;
1940     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1941     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1942     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1943   } else a->garray = 0;
1944 
1945   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1946   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1947   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1948   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1949   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1950   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1951   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1952   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1953   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1954   *newmat = mat;
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #include "petscsys.h"
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatLoad_MPIAIJ"
1962 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1963 {
1964   Mat            A;
1965   PetscScalar    *vals,*svals;
1966   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1967   MPI_Status     status;
1968   PetscErrorCode ierr;
1969   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1970   PetscInt       i,nz,j,rstart,rend,mmax;
1971   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1972   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1973   PetscInt       cend,cstart,n,*rowners;
1974   int            fd;
1975 
1976   PetscFunctionBegin;
1977   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1978   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1979   if (!rank) {
1980     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1981     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1982     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1983   }
1984 
1985   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1986   M = header[1]; N = header[2];
1987   /* determine ownership of all rows */
1988   m    = M/size + ((M % size) > rank);
1989   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1990   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1991 
1992   /* First process needs enough room for process with most rows */
1993   if (!rank) {
1994     mmax       = rowners[1];
1995     for (i=2; i<size; i++) {
1996       mmax = PetscMax(mmax,rowners[i]);
1997     }
1998   } else mmax = m;
1999 
2000   rowners[0] = 0;
2001   for (i=2; i<=size; i++) {
2002     mmax       = PetscMax(mmax,rowners[i]);
2003     rowners[i] += rowners[i-1];
2004   }
2005   rstart = rowners[rank];
2006   rend   = rowners[rank+1];
2007 
2008   /* distribute row lengths to all processors */
2009   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2010   if (!rank) {
2011     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2012     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2013     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2014     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2015     for (j=0; j<m; j++) {
2016       procsnz[0] += ourlens[j];
2017     }
2018     for (i=1; i<size; i++) {
2019       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2020       /* calculate the number of nonzeros on each processor */
2021       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2022         procsnz[i] += rowlengths[j];
2023       }
2024       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2025     }
2026     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2027   } else {
2028     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2029   }
2030 
2031   if (!rank) {
2032     /* determine max buffer needed and allocate it */
2033     maxnz = 0;
2034     for (i=0; i<size; i++) {
2035       maxnz = PetscMax(maxnz,procsnz[i]);
2036     }
2037     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2038 
2039     /* read in my part of the matrix column indices  */
2040     nz   = procsnz[0];
2041     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2042     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2043 
2044     /* read in every one elses and ship off */
2045     for (i=1; i<size; i++) {
2046       nz   = procsnz[i];
2047       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2048       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2049     }
2050     ierr = PetscFree(cols);CHKERRQ(ierr);
2051   } else {
2052     /* determine buffer space needed for message */
2053     nz = 0;
2054     for (i=0; i<m; i++) {
2055       nz += ourlens[i];
2056     }
2057     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2058 
2059     /* receive message of column indices*/
2060     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2061     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2062     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2063   }
2064 
2065   /* determine column ownership if matrix is not square */
2066   if (N != M) {
2067     n      = N/size + ((N % size) > rank);
2068     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2069     cstart = cend - n;
2070   } else {
2071     cstart = rstart;
2072     cend   = rend;
2073     n      = cend - cstart;
2074   }
2075 
2076   /* loop over local rows, determining number of off diagonal entries */
2077   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2078   jj = 0;
2079   for (i=0; i<m; i++) {
2080     for (j=0; j<ourlens[i]; j++) {
2081       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2082       jj++;
2083     }
2084   }
2085 
2086   /* create our matrix */
2087   for (i=0; i<m; i++) {
2088     ourlens[i] -= offlens[i];
2089   }
2090   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2091   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2092   ierr = MatSetType(A,type);CHKERRQ(ierr);
2093   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2094 
2095   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2096   for (i=0; i<m; i++) {
2097     ourlens[i] += offlens[i];
2098   }
2099 
2100   if (!rank) {
2101     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2102 
2103     /* read in my part of the matrix numerical values  */
2104     nz   = procsnz[0];
2105     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2106 
2107     /* insert into matrix */
2108     jj      = rstart;
2109     smycols = mycols;
2110     svals   = vals;
2111     for (i=0; i<m; i++) {
2112       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2113       smycols += ourlens[i];
2114       svals   += ourlens[i];
2115       jj++;
2116     }
2117 
2118     /* read in other processors and ship out */
2119     for (i=1; i<size; i++) {
2120       nz   = procsnz[i];
2121       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2122       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2123     }
2124     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2125   } else {
2126     /* receive numeric values */
2127     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2128 
2129     /* receive message of values*/
2130     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2131     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2132     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2133 
2134     /* insert into matrix */
2135     jj      = rstart;
2136     smycols = mycols;
2137     svals   = vals;
2138     for (i=0; i<m; i++) {
2139       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2140       smycols += ourlens[i];
2141       svals   += ourlens[i];
2142       jj++;
2143     }
2144   }
2145   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2146   ierr = PetscFree(vals);CHKERRQ(ierr);
2147   ierr = PetscFree(mycols);CHKERRQ(ierr);
2148   ierr = PetscFree(rowners);CHKERRQ(ierr);
2149 
2150   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2151   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2152   *newmat = A;
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2158 /*
2159     Not great since it makes two copies of the submatrix, first an SeqAIJ
2160   in local and then by concatenating the local matrices the end result.
2161   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2162 */
2163 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2164 {
2165   PetscErrorCode ierr;
2166   PetscMPIInt    rank,size;
2167   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2168   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2169   Mat            *local,M,Mreuse;
2170   PetscScalar    *vwork,*aa;
2171   MPI_Comm       comm = mat->comm;
2172   Mat_SeqAIJ     *aij;
2173 
2174 
2175   PetscFunctionBegin;
2176   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2177   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2178 
2179   if (call ==  MAT_REUSE_MATRIX) {
2180     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2181     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2182     local = &Mreuse;
2183     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2184   } else {
2185     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2186     Mreuse = *local;
2187     ierr   = PetscFree(local);CHKERRQ(ierr);
2188   }
2189 
2190   /*
2191       m - number of local rows
2192       n - number of columns (same on all processors)
2193       rstart - first row in new global matrix generated
2194   */
2195   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2196   if (call == MAT_INITIAL_MATRIX) {
2197     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2198     ii  = aij->i;
2199     jj  = aij->j;
2200 
2201     /*
2202         Determine the number of non-zeros in the diagonal and off-diagonal
2203         portions of the matrix in order to do correct preallocation
2204     */
2205 
2206     /* first get start and end of "diagonal" columns */
2207     if (csize == PETSC_DECIDE) {
2208       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2209       if (mglobal == n) { /* square matrix */
2210 	nlocal = m;
2211       } else {
2212         nlocal = n/size + ((n % size) > rank);
2213       }
2214     } else {
2215       nlocal = csize;
2216     }
2217     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2218     rstart = rend - nlocal;
2219     if (rank == size - 1 && rend != n) {
2220       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2221     }
2222 
2223     /* next, compute all the lengths */
2224     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2225     olens = dlens + m;
2226     for (i=0; i<m; i++) {
2227       jend = ii[i+1] - ii[i];
2228       olen = 0;
2229       dlen = 0;
2230       for (j=0; j<jend; j++) {
2231         if (*jj < rstart || *jj >= rend) olen++;
2232         else dlen++;
2233         jj++;
2234       }
2235       olens[i] = olen;
2236       dlens[i] = dlen;
2237     }
2238     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2239     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2240     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2241     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2242     ierr = PetscFree(dlens);CHKERRQ(ierr);
2243   } else {
2244     PetscInt ml,nl;
2245 
2246     M = *newmat;
2247     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2248     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2249     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2250     /*
2251          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2252        rather than the slower MatSetValues().
2253     */
2254     M->was_assembled = PETSC_TRUE;
2255     M->assembled     = PETSC_FALSE;
2256   }
2257   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2258   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2259   ii  = aij->i;
2260   jj  = aij->j;
2261   aa  = aij->a;
2262   for (i=0; i<m; i++) {
2263     row   = rstart + i;
2264     nz    = ii[i+1] - ii[i];
2265     cwork = jj;     jj += nz;
2266     vwork = aa;     aa += nz;
2267     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2268   }
2269 
2270   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2271   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2272   *newmat = M;
2273 
2274   /* save submatrix used in processor for next request */
2275   if (call ==  MAT_INITIAL_MATRIX) {
2276     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2277     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2278   }
2279 
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 EXTERN_C_BEGIN
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2286 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2287 {
2288   PetscInt       m,cstart, cend,j,nnz,i,d;
2289   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2290   const PetscInt *JJ;
2291   PetscScalar    *values;
2292   PetscErrorCode ierr;
2293 
2294   PetscFunctionBegin;
2295   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2296 
2297   B->rmap.bs = B->cmap.bs = 1;
2298   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2299   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2300   m      = B->rmap.n;
2301   cstart = B->cmap.rstart;
2302   cend   = B->cmap.rend;
2303   rstart = B->rmap.rstart;
2304 
2305   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2306   o_nnz = d_nnz + m;
2307 
2308   for (i=0; i<m; i++) {
2309     nnz     = I[i+1]- I[i];
2310     JJ      = J + I[i];
2311     nnz_max = PetscMax(nnz_max,nnz);
2312     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2313     for (j=0; j<nnz; j++) {
2314       if (*JJ >= cstart) break;
2315       JJ++;
2316     }
2317     d = 0;
2318     for (; j<nnz; j++) {
2319       if (*JJ++ >= cend) break;
2320       d++;
2321     }
2322     d_nnz[i] = d;
2323     o_nnz[i] = nnz - d;
2324   }
2325   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2326   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2327 
2328   if (v) values = (PetscScalar*)v;
2329   else {
2330     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2331     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2332   }
2333 
2334   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2335   for (i=0; i<m; i++) {
2336     ii   = i + rstart;
2337     nnz  = I[i+1]- I[i];
2338     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2339   }
2340   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2342   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2343 
2344   if (!v) {
2345     ierr = PetscFree(values);CHKERRQ(ierr);
2346   }
2347   PetscFunctionReturn(0);
2348 }
2349 EXTERN_C_END
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2353 /*@
2354    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2355    (the default parallel PETSc format).
2356 
2357    Collective on MPI_Comm
2358 
2359    Input Parameters:
2360 +  B - the matrix
2361 .  i - the indices into j for the start of each local row (starts with zero)
2362 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2363 -  v - optional values in the matrix
2364 
2365    Level: developer
2366 
2367 .keywords: matrix, aij, compressed row, sparse, parallel
2368 
2369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2370 @*/
2371 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2372 {
2373   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2374 
2375   PetscFunctionBegin;
2376   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2377   if (f) {
2378     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2379   }
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2385 /*@C
2386    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2387    (the default parallel PETSc format).  For good matrix assembly performance
2388    the user should preallocate the matrix storage by setting the parameters
2389    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2390    performance can be increased by more than a factor of 50.
2391 
2392    Collective on MPI_Comm
2393 
2394    Input Parameters:
2395 +  A - the matrix
2396 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2397            (same value is used for all local rows)
2398 .  d_nnz - array containing the number of nonzeros in the various rows of the
2399            DIAGONAL portion of the local submatrix (possibly different for each row)
2400            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2401            The size of this array is equal to the number of local rows, i.e 'm'.
2402            You must leave room for the diagonal entry even if it is zero.
2403 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2404            submatrix (same value is used for all local rows).
2405 -  o_nnz - array containing the number of nonzeros in the various rows of the
2406            OFF-DIAGONAL portion of the local submatrix (possibly different for
2407            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2408            structure. The size of this array is equal to the number
2409            of local rows, i.e 'm'.
2410 
2411    If the *_nnz parameter is given then the *_nz parameter is ignored
2412 
2413    The AIJ format (also called the Yale sparse matrix format or
2414    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2415    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2416 
2417    The parallel matrix is partitioned such that the first m0 rows belong to
2418    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2419    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2420 
2421    The DIAGONAL portion of the local submatrix of a processor can be defined
2422    as the submatrix which is obtained by extraction the part corresponding
2423    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2424    first row that belongs to the processor, and r2 is the last row belonging
2425    to the this processor. This is a square mxm matrix. The remaining portion
2426    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2427 
2428    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2429 
2430    Example usage:
2431 
2432    Consider the following 8x8 matrix with 34 non-zero values, that is
2433    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2434    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2435    as follows:
2436 
2437 .vb
2438             1  2  0  |  0  3  0  |  0  4
2439     Proc0   0  5  6  |  7  0  0  |  8  0
2440             9  0 10  | 11  0  0  | 12  0
2441     -------------------------------------
2442            13  0 14  | 15 16 17  |  0  0
2443     Proc1   0 18  0  | 19 20 21  |  0  0
2444             0  0  0  | 22 23  0  | 24  0
2445     -------------------------------------
2446     Proc2  25 26 27  |  0  0 28  | 29  0
2447            30  0  0  | 31 32 33  |  0 34
2448 .ve
2449 
2450    This can be represented as a collection of submatrices as:
2451 
2452 .vb
2453       A B C
2454       D E F
2455       G H I
2456 .ve
2457 
2458    Where the submatrices A,B,C are owned by proc0, D,E,F are
2459    owned by proc1, G,H,I are owned by proc2.
2460 
2461    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2462    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2463    The 'M','N' parameters are 8,8, and have the same values on all procs.
2464 
2465    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2466    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2467    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2468    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2469    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2470    matrix, ans [DF] as another SeqAIJ matrix.
2471 
2472    When d_nz, o_nz parameters are specified, d_nz storage elements are
2473    allocated for every row of the local diagonal submatrix, and o_nz
2474    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2475    One way to choose d_nz and o_nz is to use the max nonzerors per local
2476    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2477    In this case, the values of d_nz,o_nz are:
2478 .vb
2479      proc0 : dnz = 2, o_nz = 2
2480      proc1 : dnz = 3, o_nz = 2
2481      proc2 : dnz = 1, o_nz = 4
2482 .ve
2483    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2484    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2485    for proc3. i.e we are using 12+15+10=37 storage locations to store
2486    34 values.
2487 
2488    When d_nnz, o_nnz parameters are specified, the storage is specified
2489    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2490    In the above case the values for d_nnz,o_nnz are:
2491 .vb
2492      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2493      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2494      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2495 .ve
2496    Here the space allocated is sum of all the above values i.e 34, and
2497    hence pre-allocation is perfect.
2498 
2499    Level: intermediate
2500 
2501 .keywords: matrix, aij, compressed row, sparse, parallel
2502 
2503 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2504           MPIAIJ
2505 @*/
2506 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2507 {
2508   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2509 
2510   PetscFunctionBegin;
2511   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2512   if (f) {
2513     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2514   }
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatCreateMPIAIJ"
2520 /*@C
2521    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2522    (the default parallel PETSc format).  For good matrix assembly performance
2523    the user should preallocate the matrix storage by setting the parameters
2524    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2525    performance can be increased by more than a factor of 50.
2526 
2527    Collective on MPI_Comm
2528 
2529    Input Parameters:
2530 +  comm - MPI communicator
2531 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2532            This value should be the same as the local size used in creating the
2533            y vector for the matrix-vector product y = Ax.
2534 .  n - This value should be the same as the local size used in creating the
2535        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2536        calculated if N is given) For square matrices n is almost always m.
2537 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2538 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2539 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2540            (same value is used for all local rows)
2541 .  d_nnz - array containing the number of nonzeros in the various rows of the
2542            DIAGONAL portion of the local submatrix (possibly different for each row)
2543            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2544            The size of this array is equal to the number of local rows, i.e 'm'.
2545            You must leave room for the diagonal entry even if it is zero.
2546 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2547            submatrix (same value is used for all local rows).
2548 -  o_nnz - array containing the number of nonzeros in the various rows of the
2549            OFF-DIAGONAL portion of the local submatrix (possibly different for
2550            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2551            structure. The size of this array is equal to the number
2552            of local rows, i.e 'm'.
2553 
2554    Output Parameter:
2555 .  A - the matrix
2556 
2557    Notes:
2558    If the *_nnz parameter is given then the *_nz parameter is ignored
2559 
2560    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2561    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2562    storage requirements for this matrix.
2563 
2564    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2565    processor than it must be used on all processors that share the object for
2566    that argument.
2567 
2568    The user MUST specify either the local or global matrix dimensions
2569    (possibly both).
2570 
2571    The parallel matrix is partitioned such that the first m0 rows belong to
2572    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2573    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2574 
2575    The DIAGONAL portion of the local submatrix of a processor can be defined
2576    as the submatrix which is obtained by extraction the part corresponding
2577    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2578    first row that belongs to the processor, and r2 is the last row belonging
2579    to the this processor. This is a square mxm matrix. The remaining portion
2580    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2581 
2582    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2583 
2584    When calling this routine with a single process communicator, a matrix of
2585    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2586    type of communicator, use the construction mechanism:
2587      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2588 
2589    By default, this format uses inodes (identical nodes) when possible.
2590    We search for consecutive rows with the same nonzero structure, thereby
2591    reusing matrix information to achieve increased efficiency.
2592 
2593    Options Database Keys:
2594 +  -mat_no_inode  - Do not use inodes
2595 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2596 -  -mat_aij_oneindex - Internally use indexing starting at 1
2597         rather than 0.  Note that when calling MatSetValues(),
2598         the user still MUST index entries starting at 0!
2599 
2600 
2601    Example usage:
2602 
2603    Consider the following 8x8 matrix with 34 non-zero values, that is
2604    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2605    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2606    as follows:
2607 
2608 .vb
2609             1  2  0  |  0  3  0  |  0  4
2610     Proc0   0  5  6  |  7  0  0  |  8  0
2611             9  0 10  | 11  0  0  | 12  0
2612     -------------------------------------
2613            13  0 14  | 15 16 17  |  0  0
2614     Proc1   0 18  0  | 19 20 21  |  0  0
2615             0  0  0  | 22 23  0  | 24  0
2616     -------------------------------------
2617     Proc2  25 26 27  |  0  0 28  | 29  0
2618            30  0  0  | 31 32 33  |  0 34
2619 .ve
2620 
2621    This can be represented as a collection of submatrices as:
2622 
2623 .vb
2624       A B C
2625       D E F
2626       G H I
2627 .ve
2628 
2629    Where the submatrices A,B,C are owned by proc0, D,E,F are
2630    owned by proc1, G,H,I are owned by proc2.
2631 
2632    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2633    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2634    The 'M','N' parameters are 8,8, and have the same values on all procs.
2635 
2636    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2637    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2638    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2639    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2640    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2641    matrix, ans [DF] as another SeqAIJ matrix.
2642 
2643    When d_nz, o_nz parameters are specified, d_nz storage elements are
2644    allocated for every row of the local diagonal submatrix, and o_nz
2645    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2646    One way to choose d_nz and o_nz is to use the max nonzerors per local
2647    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2648    In this case, the values of d_nz,o_nz are:
2649 .vb
2650      proc0 : dnz = 2, o_nz = 2
2651      proc1 : dnz = 3, o_nz = 2
2652      proc2 : dnz = 1, o_nz = 4
2653 .ve
2654    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2655    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2656    for proc3. i.e we are using 12+15+10=37 storage locations to store
2657    34 values.
2658 
2659    When d_nnz, o_nnz parameters are specified, the storage is specified
2660    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2661    In the above case the values for d_nnz,o_nnz are:
2662 .vb
2663      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2664      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2665      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2666 .ve
2667    Here the space allocated is sum of all the above values i.e 34, and
2668    hence pre-allocation is perfect.
2669 
2670    Level: intermediate
2671 
2672 .keywords: matrix, aij, compressed row, sparse, parallel
2673 
2674 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2675           MPIAIJ
2676 @*/
2677 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2678 {
2679   PetscErrorCode ierr;
2680   PetscMPIInt    size;
2681 
2682   PetscFunctionBegin;
2683   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2684   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2685   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2686   if (size > 1) {
2687     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2688     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2689   } else {
2690     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2691     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2692   }
2693   PetscFunctionReturn(0);
2694 }
2695 
2696 #undef __FUNCT__
2697 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2698 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2699 {
2700   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2701 
2702   PetscFunctionBegin;
2703   *Ad     = a->A;
2704   *Ao     = a->B;
2705   *colmap = a->garray;
2706   PetscFunctionReturn(0);
2707 }
2708 
2709 #undef __FUNCT__
2710 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2711 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2712 {
2713   PetscErrorCode ierr;
2714   PetscInt       i;
2715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2716 
2717   PetscFunctionBegin;
2718   if (coloring->ctype == IS_COLORING_LOCAL) {
2719     ISColoringValue *allcolors,*colors;
2720     ISColoring      ocoloring;
2721 
2722     /* set coloring for diagonal portion */
2723     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2724 
2725     /* set coloring for off-diagonal portion */
2726     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2727     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2728     for (i=0; i<a->B->cmap.n; i++) {
2729       colors[i] = allcolors[a->garray[i]];
2730     }
2731     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2732     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2733     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2734     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2735   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2736     ISColoringValue *colors;
2737     PetscInt        *larray;
2738     ISColoring      ocoloring;
2739 
2740     /* set coloring for diagonal portion */
2741     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2742     for (i=0; i<a->A->cmap.n; i++) {
2743       larray[i] = i + A->cmap.rstart;
2744     }
2745     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2746     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2747     for (i=0; i<a->A->cmap.n; i++) {
2748       colors[i] = coloring->colors[larray[i]];
2749     }
2750     ierr = PetscFree(larray);CHKERRQ(ierr);
2751     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2752     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2753     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2754 
2755     /* set coloring for off-diagonal portion */
2756     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2757     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2758     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2759     for (i=0; i<a->B->cmap.n; i++) {
2760       colors[i] = coloring->colors[larray[i]];
2761     }
2762     ierr = PetscFree(larray);CHKERRQ(ierr);
2763     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2764     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2765     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2766   } else {
2767     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2768   }
2769 
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 #if defined(PETSC_HAVE_ADIC)
2774 #undef __FUNCT__
2775 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2776 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2777 {
2778   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2779   PetscErrorCode ierr;
2780 
2781   PetscFunctionBegin;
2782   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2783   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2784   PetscFunctionReturn(0);
2785 }
2786 #endif
2787 
2788 #undef __FUNCT__
2789 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2790 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2791 {
2792   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2793   PetscErrorCode ierr;
2794 
2795   PetscFunctionBegin;
2796   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2797   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 #undef __FUNCT__
2802 #define __FUNCT__ "MatMerge"
2803 /*@C
2804       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2805                  matrices from each processor
2806 
2807     Collective on MPI_Comm
2808 
2809    Input Parameters:
2810 +    comm - the communicators the parallel matrix will live on
2811 .    inmat - the input sequential matrices
2812 .    n - number of local columns (or PETSC_DECIDE)
2813 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2814 
2815    Output Parameter:
2816 .    outmat - the parallel matrix generated
2817 
2818     Level: advanced
2819 
2820    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2821 
2822 @*/
2823 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2824 {
2825   PetscErrorCode ierr;
2826   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2827   PetscInt       *indx;
2828   PetscScalar    *values;
2829 
2830   PetscFunctionBegin;
2831   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2832   if (scall == MAT_INITIAL_MATRIX){
2833     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2834     if (n == PETSC_DECIDE){
2835       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2836     }
2837     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2838     rstart -= m;
2839 
2840     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2841     for (i=0;i<m;i++) {
2842       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2843       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2844       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2845     }
2846     /* This routine will ONLY return MPIAIJ type matrix */
2847     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2848     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2849     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2850     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2851     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2852 
2853   } else if (scall == MAT_REUSE_MATRIX){
2854     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2855   } else {
2856     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2857   }
2858 
2859   for (i=0;i<m;i++) {
2860     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2861     I    = i + rstart;
2862     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2863     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2864   }
2865   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2866   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2867   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2868 
2869   PetscFunctionReturn(0);
2870 }
2871 
2872 #undef __FUNCT__
2873 #define __FUNCT__ "MatFileSplit"
2874 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2875 {
2876   PetscErrorCode    ierr;
2877   PetscMPIInt       rank;
2878   PetscInt          m,N,i,rstart,nnz;
2879   size_t            len;
2880   const PetscInt    *indx;
2881   PetscViewer       out;
2882   char              *name;
2883   Mat               B;
2884   const PetscScalar *values;
2885 
2886   PetscFunctionBegin;
2887   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2888   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2889   /* Should this be the type of the diagonal block of A? */
2890   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2891   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2892   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2893   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2894   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2895   for (i=0;i<m;i++) {
2896     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2897     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2898     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2899   }
2900   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2901   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2902 
2903   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2904   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2905   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2906   sprintf(name,"%s.%d",outfile,rank);
2907   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
2908   ierr = PetscFree(name);
2909   ierr = MatView(B,out);CHKERRQ(ierr);
2910   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2911   ierr = MatDestroy(B);CHKERRQ(ierr);
2912   PetscFunctionReturn(0);
2913 }
2914 
2915 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2916 #undef __FUNCT__
2917 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2918 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2919 {
2920   PetscErrorCode       ierr;
2921   Mat_Merge_SeqsToMPI  *merge;
2922   PetscObjectContainer container;
2923 
2924   PetscFunctionBegin;
2925   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2926   if (container) {
2927     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2928     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2929     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2930     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2931     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2932     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2933     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2934     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2935     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
2936     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
2937     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
2938     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
2939 
2940     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2941     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2942   }
2943   ierr = PetscFree(merge);CHKERRQ(ierr);
2944 
2945   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 #include "src/mat/utils/freespace.h"
2950 #include "petscbt.h"
2951 static PetscEvent logkey_seqstompinum = 0;
2952 #undef __FUNCT__
2953 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2954 /*@C
2955       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2956                  matrices from each processor
2957 
2958     Collective on MPI_Comm
2959 
2960    Input Parameters:
2961 +    comm - the communicators the parallel matrix will live on
2962 .    seqmat - the input sequential matrices
2963 .    m - number of local rows (or PETSC_DECIDE)
2964 .    n - number of local columns (or PETSC_DECIDE)
2965 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2966 
2967    Output Parameter:
2968 .    mpimat - the parallel matrix generated
2969 
2970     Level: advanced
2971 
2972    Notes:
2973      The dimensions of the sequential matrix in each processor MUST be the same.
2974      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2975      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2976 @*/
2977 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2978 {
2979   PetscErrorCode       ierr;
2980   MPI_Comm             comm=mpimat->comm;
2981   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2982   PetscMPIInt          size,rank,taga,*len_s;
2983   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
2984   PetscInt             proc,m;
2985   PetscInt             **buf_ri,**buf_rj;
2986   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2987   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2988   MPI_Request          *s_waits,*r_waits;
2989   MPI_Status           *status;
2990   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2991   Mat_Merge_SeqsToMPI  *merge;
2992   PetscObjectContainer container;
2993 
2994   PetscFunctionBegin;
2995   if (!logkey_seqstompinum) {
2996     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2997   }
2998   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2999 
3000   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3001   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3002 
3003   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3004   if (container) {
3005     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3006   }
3007   bi     = merge->bi;
3008   bj     = merge->bj;
3009   buf_ri = merge->buf_ri;
3010   buf_rj = merge->buf_rj;
3011 
3012   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3013   owners = merge->rowmap.range;
3014   len_s  = merge->len_s;
3015 
3016   /* send and recv matrix values */
3017   /*-----------------------------*/
3018   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3019   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3020 
3021   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3022   for (proc=0,k=0; proc<size; proc++){
3023     if (!len_s[proc]) continue;
3024     i = owners[proc];
3025     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3026     k++;
3027   }
3028 
3029   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3030   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3031   ierr = PetscFree(status);CHKERRQ(ierr);
3032 
3033   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3034   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3035 
3036   /* insert mat values of mpimat */
3037   /*----------------------------*/
3038   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3039   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3040   nextrow = buf_ri_k + merge->nrecv;
3041   nextai  = nextrow + merge->nrecv;
3042 
3043   for (k=0; k<merge->nrecv; k++){
3044     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3045     nrows = *(buf_ri_k[k]);
3046     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3047     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3048   }
3049 
3050   /* set values of ba */
3051   m = merge->rowmap.n;
3052   for (i=0; i<m; i++) {
3053     arow = owners[rank] + i;
3054     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3055     bnzi = bi[i+1] - bi[i];
3056     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3057 
3058     /* add local non-zero vals of this proc's seqmat into ba */
3059     anzi = ai[arow+1] - ai[arow];
3060     aj   = a->j + ai[arow];
3061     aa   = a->a + ai[arow];
3062     nextaj = 0;
3063     for (j=0; nextaj<anzi; j++){
3064       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3065         ba_i[j] += aa[nextaj++];
3066       }
3067     }
3068 
3069     /* add received vals into ba */
3070     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3071       /* i-th row */
3072       if (i == *nextrow[k]) {
3073         anzi = *(nextai[k]+1) - *nextai[k];
3074         aj   = buf_rj[k] + *(nextai[k]);
3075         aa   = abuf_r[k] + *(nextai[k]);
3076         nextaj = 0;
3077         for (j=0; nextaj<anzi; j++){
3078           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3079             ba_i[j] += aa[nextaj++];
3080           }
3081         }
3082         nextrow[k]++; nextai[k]++;
3083       }
3084     }
3085     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3086   }
3087   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3088   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3089 
3090   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3091   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3092   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3093   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3094   PetscFunctionReturn(0);
3095 }
3096 
3097 static PetscEvent logkey_seqstompisym = 0;
3098 #undef __FUNCT__
3099 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3100 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3101 {
3102   PetscErrorCode       ierr;
3103   Mat                  B_mpi;
3104   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3105   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3106   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3107   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3108   PetscInt             len,proc,*dnz,*onz;
3109   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3110   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3111   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3112   MPI_Status           *status;
3113   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3114   PetscBT              lnkbt;
3115   Mat_Merge_SeqsToMPI  *merge;
3116   PetscObjectContainer container;
3117 
3118   PetscFunctionBegin;
3119   if (!logkey_seqstompisym) {
3120     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3121   }
3122   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3123 
3124   /* make sure it is a PETSc comm */
3125   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3126   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3127   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3128 
3129   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3130   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3131 
3132   /* determine row ownership */
3133   /*---------------------------------------------------------*/
3134   merge->rowmap.n = m;
3135   merge->rowmap.N = M;
3136   merge->rowmap.bs = 1;
3137   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3138   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3139   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3140 
3141   m      = merge->rowmap.n;
3142   M      = merge->rowmap.N;
3143   owners = merge->rowmap.range;
3144 
3145   /* determine the number of messages to send, their lengths */
3146   /*---------------------------------------------------------*/
3147   len_s  = merge->len_s;
3148 
3149   len = 0;  /* length of buf_si[] */
3150   merge->nsend = 0;
3151   for (proc=0; proc<size; proc++){
3152     len_si[proc] = 0;
3153     if (proc == rank){
3154       len_s[proc] = 0;
3155     } else {
3156       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3157       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3158     }
3159     if (len_s[proc]) {
3160       merge->nsend++;
3161       nrows = 0;
3162       for (i=owners[proc]; i<owners[proc+1]; i++){
3163         if (ai[i+1] > ai[i]) nrows++;
3164       }
3165       len_si[proc] = 2*(nrows+1);
3166       len += len_si[proc];
3167     }
3168   }
3169 
3170   /* determine the number and length of messages to receive for ij-structure */
3171   /*-------------------------------------------------------------------------*/
3172   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3173   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3174 
3175   /* post the Irecv of j-structure */
3176   /*-------------------------------*/
3177   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3178   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3179 
3180   /* post the Isend of j-structure */
3181   /*--------------------------------*/
3182   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3183   sj_waits = si_waits + merge->nsend;
3184 
3185   for (proc=0, k=0; proc<size; proc++){
3186     if (!len_s[proc]) continue;
3187     i = owners[proc];
3188     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3189     k++;
3190   }
3191 
3192   /* receives and sends of j-structure are complete */
3193   /*------------------------------------------------*/
3194   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3195   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3196 
3197   /* send and recv i-structure */
3198   /*---------------------------*/
3199   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3200   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3201 
3202   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3203   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3204   for (proc=0,k=0; proc<size; proc++){
3205     if (!len_s[proc]) continue;
3206     /* form outgoing message for i-structure:
3207          buf_si[0]:                 nrows to be sent
3208                [1:nrows]:           row index (global)
3209                [nrows+1:2*nrows+1]: i-structure index
3210     */
3211     /*-------------------------------------------*/
3212     nrows = len_si[proc]/2 - 1;
3213     buf_si_i    = buf_si + nrows+1;
3214     buf_si[0]   = nrows;
3215     buf_si_i[0] = 0;
3216     nrows = 0;
3217     for (i=owners[proc]; i<owners[proc+1]; i++){
3218       anzi = ai[i+1] - ai[i];
3219       if (anzi) {
3220         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3221         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3222         nrows++;
3223       }
3224     }
3225     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3226     k++;
3227     buf_si += len_si[proc];
3228   }
3229 
3230   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3231   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3232 
3233   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3234   for (i=0; i<merge->nrecv; i++){
3235     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3236   }
3237 
3238   ierr = PetscFree(len_si);CHKERRQ(ierr);
3239   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3240   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3241   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3242   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3243   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3244   ierr = PetscFree(status);CHKERRQ(ierr);
3245 
3246   /* compute a local seq matrix in each processor */
3247   /*----------------------------------------------*/
3248   /* allocate bi array and free space for accumulating nonzero column info */
3249   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3250   bi[0] = 0;
3251 
3252   /* create and initialize a linked list */
3253   nlnk = N+1;
3254   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3255 
3256   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3257   len = 0;
3258   len  = ai[owners[rank+1]] - ai[owners[rank]];
3259   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3260   current_space = free_space;
3261 
3262   /* determine symbolic info for each local row */
3263   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3264   nextrow = buf_ri_k + merge->nrecv;
3265   nextai  = nextrow + merge->nrecv;
3266   for (k=0; k<merge->nrecv; k++){
3267     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3268     nrows = *buf_ri_k[k];
3269     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3270     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3271   }
3272 
3273   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3274   len = 0;
3275   for (i=0;i<m;i++) {
3276     bnzi   = 0;
3277     /* add local non-zero cols of this proc's seqmat into lnk */
3278     arow   = owners[rank] + i;
3279     anzi   = ai[arow+1] - ai[arow];
3280     aj     = a->j + ai[arow];
3281     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3282     bnzi += nlnk;
3283     /* add received col data into lnk */
3284     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3285       if (i == *nextrow[k]) { /* i-th row */
3286         anzi = *(nextai[k]+1) - *nextai[k];
3287         aj   = buf_rj[k] + *nextai[k];
3288         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3289         bnzi += nlnk;
3290         nextrow[k]++; nextai[k]++;
3291       }
3292     }
3293     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3294 
3295     /* if free space is not available, make more free space */
3296     if (current_space->local_remaining<bnzi) {
3297       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3298       nspacedouble++;
3299     }
3300     /* copy data into free space, then initialize lnk */
3301     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3302     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3303 
3304     current_space->array           += bnzi;
3305     current_space->local_used      += bnzi;
3306     current_space->local_remaining -= bnzi;
3307 
3308     bi[i+1] = bi[i] + bnzi;
3309   }
3310 
3311   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3312 
3313   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3314   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3315   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3316 
3317   /* create symbolic parallel matrix B_mpi */
3318   /*---------------------------------------*/
3319   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3320   if (n==PETSC_DECIDE) {
3321     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3322   } else {
3323     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3324   }
3325   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3326   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3327   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3328 
3329   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3330   B_mpi->assembled     = PETSC_FALSE;
3331   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3332   merge->bi            = bi;
3333   merge->bj            = bj;
3334   merge->buf_ri        = buf_ri;
3335   merge->buf_rj        = buf_rj;
3336   merge->coi           = PETSC_NULL;
3337   merge->coj           = PETSC_NULL;
3338   merge->owners_co     = PETSC_NULL;
3339 
3340   /* attach the supporting struct to B_mpi for reuse */
3341   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3342   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3343   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3344   *mpimat = B_mpi;
3345 
3346   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3347   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3348   PetscFunctionReturn(0);
3349 }
3350 
3351 static PetscEvent logkey_seqstompi = 0;
3352 #undef __FUNCT__
3353 #define __FUNCT__ "MatMerge_SeqsToMPI"
3354 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3355 {
3356   PetscErrorCode   ierr;
3357 
3358   PetscFunctionBegin;
3359   if (!logkey_seqstompi) {
3360     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3361   }
3362   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3363   if (scall == MAT_INITIAL_MATRIX){
3364     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3365   }
3366   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3367   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3368   PetscFunctionReturn(0);
3369 }
3370 static PetscEvent logkey_getlocalmat = 0;
3371 #undef __FUNCT__
3372 #define __FUNCT__ "MatGetLocalMat"
3373 /*@C
3374      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3375 
3376     Not Collective
3377 
3378    Input Parameters:
3379 +    A - the matrix
3380 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3381 
3382    Output Parameter:
3383 .    A_loc - the local sequential matrix generated
3384 
3385     Level: developer
3386 
3387 @*/
3388 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3389 {
3390   PetscErrorCode  ierr;
3391   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3392   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3393   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3394   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3395   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3396   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3397 
3398   PetscFunctionBegin;
3399   if (!logkey_getlocalmat) {
3400     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3401   }
3402   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3403   if (scall == MAT_INITIAL_MATRIX){
3404     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3405     ci[0] = 0;
3406     for (i=0; i<am; i++){
3407       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3408     }
3409     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3410     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3411     k = 0;
3412     for (i=0; i<am; i++) {
3413       ncols_o = bi[i+1] - bi[i];
3414       ncols_d = ai[i+1] - ai[i];
3415       /* off-diagonal portion of A */
3416       for (jo=0; jo<ncols_o; jo++) {
3417         col = cmap[*bj];
3418         if (col >= cstart) break;
3419         cj[k]   = col; bj++;
3420         ca[k++] = *ba++;
3421       }
3422       /* diagonal portion of A */
3423       for (j=0; j<ncols_d; j++) {
3424         cj[k]   = cstart + *aj++;
3425         ca[k++] = *aa++;
3426       }
3427       /* off-diagonal portion of A */
3428       for (j=jo; j<ncols_o; j++) {
3429         cj[k]   = cmap[*bj++];
3430         ca[k++] = *ba++;
3431       }
3432     }
3433     /* put together the new matrix */
3434     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3435     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3436     /* Since these are PETSc arrays, change flags to free them as necessary. */
3437     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3438     mat->freedata = PETSC_TRUE;
3439     mat->nonew    = 0;
3440   } else if (scall == MAT_REUSE_MATRIX){
3441     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3442     ci = mat->i; cj = mat->j; ca = mat->a;
3443     for (i=0; i<am; i++) {
3444       /* off-diagonal portion of A */
3445       ncols_o = bi[i+1] - bi[i];
3446       for (jo=0; jo<ncols_o; jo++) {
3447         col = cmap[*bj];
3448         if (col >= cstart) break;
3449         *ca++ = *ba++; bj++;
3450       }
3451       /* diagonal portion of A */
3452       ncols_d = ai[i+1] - ai[i];
3453       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3454       /* off-diagonal portion of A */
3455       for (j=jo; j<ncols_o; j++) {
3456         *ca++ = *ba++; bj++;
3457       }
3458     }
3459   } else {
3460     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3461   }
3462 
3463   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3464   PetscFunctionReturn(0);
3465 }
3466 
3467 static PetscEvent logkey_getlocalmatcondensed = 0;
3468 #undef __FUNCT__
3469 #define __FUNCT__ "MatGetLocalMatCondensed"
3470 /*@C
3471      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3472 
3473     Not Collective
3474 
3475    Input Parameters:
3476 +    A - the matrix
3477 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3478 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3479 
3480    Output Parameter:
3481 .    A_loc - the local sequential matrix generated
3482 
3483     Level: developer
3484 
3485 @*/
3486 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3487 {
3488   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3489   PetscErrorCode    ierr;
3490   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3491   IS                isrowa,iscola;
3492   Mat               *aloc;
3493 
3494   PetscFunctionBegin;
3495   if (!logkey_getlocalmatcondensed) {
3496     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3497   }
3498   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3499   if (!row){
3500     start = A->rmap.rstart; end = A->rmap.rend;
3501     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3502   } else {
3503     isrowa = *row;
3504   }
3505   if (!col){
3506     start = A->cmap.rstart;
3507     cmap  = a->garray;
3508     nzA   = a->A->cmap.n;
3509     nzB   = a->B->cmap.n;
3510     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3511     ncols = 0;
3512     for (i=0; i<nzB; i++) {
3513       if (cmap[i] < start) idx[ncols++] = cmap[i];
3514       else break;
3515     }
3516     imark = i;
3517     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3518     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3519     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3520     ierr = PetscFree(idx);CHKERRQ(ierr);
3521   } else {
3522     iscola = *col;
3523   }
3524   if (scall != MAT_INITIAL_MATRIX){
3525     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3526     aloc[0] = *A_loc;
3527   }
3528   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3529   *A_loc = aloc[0];
3530   ierr = PetscFree(aloc);CHKERRQ(ierr);
3531   if (!row){
3532     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3533   }
3534   if (!col){
3535     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3536   }
3537   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3538   PetscFunctionReturn(0);
3539 }
3540 
3541 static PetscEvent logkey_GetBrowsOfAcols = 0;
3542 #undef __FUNCT__
3543 #define __FUNCT__ "MatGetBrowsOfAcols"
3544 /*@C
3545     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3546 
3547     Collective on Mat
3548 
3549    Input Parameters:
3550 +    A,B - the matrices in mpiaij format
3551 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3552 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3553 
3554    Output Parameter:
3555 +    rowb, colb - index sets of rows and columns of B to extract
3556 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3557 -    B_seq - the sequential matrix generated
3558 
3559     Level: developer
3560 
3561 @*/
3562 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3563 {
3564   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3565   PetscErrorCode    ierr;
3566   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3567   IS                isrowb,iscolb;
3568   Mat               *bseq;
3569 
3570   PetscFunctionBegin;
3571   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3572     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3573   }
3574   if (!logkey_GetBrowsOfAcols) {
3575     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3576   }
3577   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3578 
3579   if (scall == MAT_INITIAL_MATRIX){
3580     start = A->cmap.rstart;
3581     cmap  = a->garray;
3582     nzA   = a->A->cmap.n;
3583     nzB   = a->B->cmap.n;
3584     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3585     ncols = 0;
3586     for (i=0; i<nzB; i++) {  /* row < local row index */
3587       if (cmap[i] < start) idx[ncols++] = cmap[i];
3588       else break;
3589     }
3590     imark = i;
3591     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3592     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3593     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3594     ierr = PetscFree(idx);CHKERRQ(ierr);
3595     *brstart = imark;
3596     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3597   } else {
3598     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3599     isrowb = *rowb; iscolb = *colb;
3600     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3601     bseq[0] = *B_seq;
3602   }
3603   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3604   *B_seq = bseq[0];
3605   ierr = PetscFree(bseq);CHKERRQ(ierr);
3606   if (!rowb){
3607     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3608   } else {
3609     *rowb = isrowb;
3610   }
3611   if (!colb){
3612     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3613   } else {
3614     *colb = iscolb;
3615   }
3616   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3617   PetscFunctionReturn(0);
3618 }
3619 
3620 static PetscEvent logkey_GetBrowsOfAocols = 0;
3621 #undef __FUNCT__
3622 #define __FUNCT__ "MatGetBrowsOfAoCols"
3623 /*@C
3624     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3625     of the OFF-DIAGONAL portion of local A
3626 
3627     Collective on Mat
3628 
3629    Input Parameters:
3630 +    A,B - the matrices in mpiaij format
3631 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3632 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3633 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3634 
3635    Output Parameter:
3636 +    B_oth - the sequential matrix generated
3637 
3638     Level: developer
3639 
3640 @*/
3641 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3642 {
3643   VecScatter_MPI_General *gen_to,*gen_from;
3644   PetscErrorCode         ierr;
3645   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3646   Mat_SeqAIJ             *b_oth;
3647   VecScatter             ctx=a->Mvctx;
3648   MPI_Comm               comm=ctx->comm;
3649   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3650   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3651   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3652   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3653   MPI_Request            *rwaits,*swaits;
3654   MPI_Status             *sstatus,rstatus;
3655   PetscInt               *cols;
3656   PetscScalar            *vals;
3657   PetscMPIInt            j;
3658 
3659   PetscFunctionBegin;
3660   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3661     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3662   }
3663   if (!logkey_GetBrowsOfAocols) {
3664     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3665   }
3666   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3667   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3668 
3669   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3670   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3671   rvalues  = gen_from->values; /* holds the length of sending row */
3672   svalues  = gen_to->values;   /* holds the length of receiving row */
3673   nrecvs   = gen_from->n;
3674   nsends   = gen_to->n;
3675   rwaits   = gen_from->requests;
3676   swaits   = gen_to->requests;
3677   srow     = gen_to->indices;   /* local row index to be sent */
3678   rstarts  = gen_from->starts;
3679   sstarts  = gen_to->starts;
3680   rprocs   = gen_from->procs;
3681   sprocs   = gen_to->procs;
3682   sstatus  = gen_to->sstatus;
3683 
3684   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3685   if (scall == MAT_INITIAL_MATRIX){
3686     /* i-array */
3687     /*---------*/
3688     /*  post receives */
3689     for (i=0; i<nrecvs; i++){
3690       rowlen = (PetscInt*)rvalues + rstarts[i];
3691       nrows = rstarts[i+1]-rstarts[i];
3692       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3693     }
3694 
3695     /* pack the outgoing message */
3696     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3697     rstartsj = sstartsj + nsends +1;
3698     sstartsj[0] = 0;  rstartsj[0] = 0;
3699     len = 0; /* total length of j or a array to be sent */
3700     k = 0;
3701     for (i=0; i<nsends; i++){
3702       rowlen = (PetscInt*)svalues + sstarts[i];
3703       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3704       for (j=0; j<nrows; j++) {
3705         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3706         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3707         len += rowlen[j];
3708         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3709         k++;
3710       }
3711       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3712        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3713     }
3714     /* recvs and sends of i-array are completed */
3715     i = nrecvs;
3716     while (i--) {
3717       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3718     }
3719     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3720     /* allocate buffers for sending j and a arrays */
3721     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3722     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3723 
3724     /* create i-array of B_oth */
3725     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3726     b_othi[0] = 0;
3727     len = 0; /* total length of j or a array to be received */
3728     k = 0;
3729     for (i=0; i<nrecvs; i++){
3730       rowlen = (PetscInt*)rvalues + rstarts[i];
3731       nrows = rstarts[i+1]-rstarts[i];
3732       for (j=0; j<nrows; j++) {
3733         b_othi[k+1] = b_othi[k] + rowlen[j];
3734         len += rowlen[j]; k++;
3735       }
3736       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3737     }
3738 
3739     /* allocate space for j and a arrrays of B_oth */
3740     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3741     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3742 
3743     /* j-array */
3744     /*---------*/
3745     /*  post receives of j-array */
3746     for (i=0; i<nrecvs; i++){
3747       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3748       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3749     }
3750     k = 0;
3751     for (i=0; i<nsends; i++){
3752       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3753       bufJ = bufj+sstartsj[i];
3754       for (j=0; j<nrows; j++) {
3755         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3756         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3757         for (l=0; l<ncols; l++){
3758           *bufJ++ = cols[l];
3759         }
3760         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3761       }
3762       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3763     }
3764 
3765     /* recvs and sends of j-array are completed */
3766     i = nrecvs;
3767     while (i--) {
3768       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3769     }
3770     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3771   } else if (scall == MAT_REUSE_MATRIX){
3772     sstartsj = *startsj;
3773     rstartsj = sstartsj + nsends +1;
3774     bufa     = *bufa_ptr;
3775     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3776     b_otha   = b_oth->a;
3777   } else {
3778     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3779   }
3780 
3781   /* a-array */
3782   /*---------*/
3783   /*  post receives of a-array */
3784   for (i=0; i<nrecvs; i++){
3785     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3786     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3787   }
3788   k = 0;
3789   for (i=0; i<nsends; i++){
3790     nrows = sstarts[i+1]-sstarts[i];
3791     bufA = bufa+sstartsj[i];
3792     for (j=0; j<nrows; j++) {
3793       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3794       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3795       for (l=0; l<ncols; l++){
3796         *bufA++ = vals[l];
3797       }
3798       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3799 
3800     }
3801     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3802   }
3803   /* recvs and sends of a-array are completed */
3804   i = nrecvs;
3805   while (i--) {
3806     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3807   }
3808    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3809 
3810   if (scall == MAT_INITIAL_MATRIX){
3811     /* put together the new matrix */
3812     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3813 
3814     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3815     /* Since these are PETSc arrays, change flags to free them as necessary. */
3816     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3817     b_oth->freedata = PETSC_TRUE;
3818     b_oth->nonew    = 0;
3819 
3820     ierr = PetscFree(bufj);CHKERRQ(ierr);
3821     if (!startsj || !bufa_ptr){
3822       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3823       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3824     } else {
3825       *startsj  = sstartsj;
3826       *bufa_ptr = bufa;
3827     }
3828   }
3829   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3830 
3831   PetscFunctionReturn(0);
3832 }
3833 
3834 #undef __FUNCT__
3835 #define __FUNCT__ "MatGetCommunicationStructs"
3836 /*@C
3837   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3838 
3839   Not Collective
3840 
3841   Input Parameters:
3842 . A - The matrix in mpiaij format
3843 
3844   Output Parameter:
3845 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
3846 . colmap - A map from global column index to local index into lvec
3847 - multScatter - A scatter from the argument of a matrix-vector product to lvec
3848 
3849   Level: developer
3850 
3851 @*/
3852 #if defined (PETSC_USE_CTABLE)
3853 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
3854 #else
3855 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
3856 #endif
3857 {
3858   Mat_MPIAIJ *a;
3859 
3860   PetscFunctionBegin;
3861   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
3862   PetscValidPointer(lvec, 2)
3863   PetscValidPointer(colmap, 3)
3864   PetscValidPointer(multScatter, 4)
3865   a = (Mat_MPIAIJ *) A->data;
3866   if (lvec) *lvec = a->lvec;
3867   if (colmap) *colmap = a->colmap;
3868   if (multScatter) *multScatter = a->Mvctx;
3869   PetscFunctionReturn(0);
3870 }
3871 
3872 /*MC
3873    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3874 
3875    Options Database Keys:
3876 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3877 
3878   Level: beginner
3879 
3880 .seealso: MatCreateMPIAIJ
3881 M*/
3882 
3883 EXTERN_C_BEGIN
3884 #undef __FUNCT__
3885 #define __FUNCT__ "MatCreate_MPIAIJ"
3886 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3887 {
3888   Mat_MPIAIJ     *b;
3889   PetscErrorCode ierr;
3890   PetscMPIInt    size;
3891 
3892   PetscFunctionBegin;
3893   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3894 
3895   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3896   B->data         = (void*)b;
3897   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3898   B->factor       = 0;
3899   B->rmap.bs      = 1;
3900   B->assembled    = PETSC_FALSE;
3901   B->mapping      = 0;
3902 
3903   B->insertmode      = NOT_SET_VALUES;
3904   b->size            = size;
3905   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3906 
3907   /* build cache for off array entries formed */
3908   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3909   b->donotstash  = PETSC_FALSE;
3910   b->colmap      = 0;
3911   b->garray      = 0;
3912   b->roworiented = PETSC_TRUE;
3913 
3914   /* stuff used for matrix vector multiply */
3915   b->lvec      = PETSC_NULL;
3916   b->Mvctx     = PETSC_NULL;
3917 
3918   /* stuff for MatGetRow() */
3919   b->rowindices   = 0;
3920   b->rowvalues    = 0;
3921   b->getrowactive = PETSC_FALSE;
3922 
3923 
3924   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3925                                      "MatStoreValues_MPIAIJ",
3926                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3927   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3928                                      "MatRetrieveValues_MPIAIJ",
3929                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3930   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3931 				     "MatGetDiagonalBlock_MPIAIJ",
3932                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3933   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3934 				     "MatIsTranspose_MPIAIJ",
3935 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3936   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3937 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3938 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3939   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3940 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3941 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3942   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3943 				     "MatDiagonalScaleLocal_MPIAIJ",
3944 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3945   PetscFunctionReturn(0);
3946 }
3947 EXTERN_C_END
3948 
3949 /*
3950     Special version for direct calls from Fortran
3951 */
3952 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3953 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
3954 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3955 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
3956 #endif
3957 
3958 /* Change these macros so can be used in void function */
3959 #undef CHKERRQ
3960 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
3961 #undef SETERRQ2
3962 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
3963 #undef SETERRQ
3964 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
3965 
3966 EXTERN_C_BEGIN
3967 #undef __FUNCT__
3968 #define __FUNCT__ "matsetvaluesmpiaij_"
3969 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv)
3970 {
3971   Mat            mat = *mmat;
3972   PetscInt       m = *mm, n = *mn;
3973   InsertMode     addv = *maddv;
3974   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3975   PetscScalar    value;
3976   PetscErrorCode ierr;
3977 
3978   MatPreallocated(mat);
3979   if (mat->insertmode == NOT_SET_VALUES) {
3980     mat->insertmode = addv;
3981   }
3982 #if defined(PETSC_USE_DEBUG)
3983   else if (mat->insertmode != addv) {
3984     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3985   }
3986 #endif
3987   {
3988   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
3989   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
3990   PetscTruth     roworiented = aij->roworiented;
3991 
3992   /* Some Variables required in the macro */
3993   Mat            A = aij->A;
3994   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
3995   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
3996   PetscScalar    *aa = a->a;
3997   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
3998   Mat            B = aij->B;
3999   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4000   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4001   PetscScalar    *ba = b->a;
4002 
4003   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4004   PetscInt       nonew = a->nonew;
4005   PetscScalar    *ap1,*ap2;
4006 
4007   PetscFunctionBegin;
4008   for (i=0; i<m; i++) {
4009     if (im[i] < 0) continue;
4010 #if defined(PETSC_USE_DEBUG)
4011     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4012 #endif
4013     if (im[i] >= rstart && im[i] < rend) {
4014       row      = im[i] - rstart;
4015       lastcol1 = -1;
4016       rp1      = aj + ai[row];
4017       ap1      = aa + ai[row];
4018       rmax1    = aimax[row];
4019       nrow1    = ailen[row];
4020       low1     = 0;
4021       high1    = nrow1;
4022       lastcol2 = -1;
4023       rp2      = bj + bi[row];
4024       ap2      = ba + bi[row];
4025       rmax2    = bimax[row];
4026       nrow2    = bilen[row];
4027       low2     = 0;
4028       high2    = nrow2;
4029 
4030       for (j=0; j<n; j++) {
4031         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4032         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4033         if (in[j] >= cstart && in[j] < cend){
4034           col = in[j] - cstart;
4035           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4036         } else if (in[j] < 0) continue;
4037 #if defined(PETSC_USE_DEBUG)
4038         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4039 #endif
4040         else {
4041           if (mat->was_assembled) {
4042             if (!aij->colmap) {
4043               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4044             }
4045 #if defined (PETSC_USE_CTABLE)
4046             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4047 	    col--;
4048 #else
4049             col = aij->colmap[in[j]] - 1;
4050 #endif
4051             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4052               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4053               col =  in[j];
4054               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4055               B = aij->B;
4056               b = (Mat_SeqAIJ*)B->data;
4057               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4058               rp2      = bj + bi[row];
4059               ap2      = ba + bi[row];
4060               rmax2    = bimax[row];
4061               nrow2    = bilen[row];
4062               low2     = 0;
4063               high2    = nrow2;
4064               bm       = aij->B->rmap.n;
4065               ba = b->a;
4066             }
4067           } else col = in[j];
4068           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4069         }
4070       }
4071     } else {
4072       if (!aij->donotstash) {
4073         if (roworiented) {
4074           if (ignorezeroentries && v[i*n] == 0.0) continue;
4075           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4076         } else {
4077           if (ignorezeroentries && v[i] == 0.0) continue;
4078           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4079         }
4080       }
4081     }
4082   }}
4083   PetscFunctionReturnVoid();
4084 }
4085 EXTERN_C_END
4086