xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 906ed7cc33fecbafab01746eec64dcdcc8a4842f)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324   PetscFunctionBegin;
325   if (!aij->donotstash) {
326     while (1) {
327       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
328       if (!flg) break;
329 
330       for (i=0; i<n;) {
331         /* Now identify the consecutive vals belonging to the same row */
332         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
333         if (j < n) ncols = j-i;
334         else       ncols = n-i;
335         /* Now assemble all these values with a single function call */
336         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
337         i = j;
338       }
339     }
340     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
341   }
342   a->compressedrow.use     = PETSC_FALSE;
343   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
345 
346   /* determine if any processor has disassembled, if so we must
347      also disassemble ourselfs, in order that we may reassemble. */
348   /*
349      if nonzero structure of submatrix B cannot change then we know that
350      no processor disassembled thus we can skip this stuff
351   */
352   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
353     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
354     if (mat->was_assembled && !other_disassembled) {
355       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
356     }
357   }
358   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
359     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
360   }
361   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
362   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
363   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
364   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
365 
366   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
367   aij->rowvalues = 0;
368 
369   /* used by MatAXPY() */
370   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
371   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
379 {
380   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
385   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatZeroRows_MPIAIJ"
391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
392 {
393   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394   PetscErrorCode ierr;
395   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
396   PetscInt       i,*owners = A->rmap.range;
397   PetscInt       *nprocs,j,idx,nsends,row;
398   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
399   PetscInt       *rvalues,count,base,slen,*source;
400   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
401   MPI_Comm       comm = A->comm;
402   MPI_Request    *send_waits,*recv_waits;
403   MPI_Status     recv_status,*send_status;
404 #if defined(PETSC_DEBUG)
405   PetscTruth     found = PETSC_FALSE;
406 #endif
407 
408   PetscFunctionBegin;
409   /*  first count number of contributors to each processor */
410   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
411   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
412   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
413   j = 0;
414   for (i=0; i<N; i++) {
415     if (lastidx > (idx = rows[i])) j = 0;
416     lastidx = idx;
417     for (; j<size; j++) {
418       if (idx >= owners[j] && idx < owners[j+1]) {
419         nprocs[2*j]++;
420         nprocs[2*j+1] = 1;
421         owner[i] = j;
422 #if defined(PETSC_DEBUG)
423         found = PETSC_TRUE;
424 #endif
425         break;
426       }
427     }
428 #if defined(PETSC_DEBUG)
429     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
430     found = PETSC_FALSE;
431 #endif
432   }
433   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434 
435   /* inform other processors of number of messages and max length*/
436   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
437 
438   /* post receives:   */
439   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
440   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
441   for (i=0; i<nrecvs; i++) {
442     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
443   }
444 
445   /* do sends:
446       1) starts[i] gives the starting index in svalues for stuff going to
447          the ith processor
448   */
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
450   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
452   starts[0] = 0;
453   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
454   for (i=0; i<N; i++) {
455     svalues[starts[owner[i]]++] = rows[i];
456   }
457 
458   starts[0] = 0;
459   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
460   count = 0;
461   for (i=0; i<size; i++) {
462     if (nprocs[2*i+1]) {
463       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
464     }
465   }
466   ierr = PetscFree(starts);CHKERRQ(ierr);
467 
468   base = owners[rank];
469 
470   /*  wait on receives */
471   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
472   source = lens + nrecvs;
473   count  = nrecvs; slen = 0;
474   while (count) {
475     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
476     /* unpack receives into our local space */
477     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
478     source[imdex]  = recv_status.MPI_SOURCE;
479     lens[imdex]    = n;
480     slen          += n;
481     count--;
482   }
483   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
484 
485   /* move the data into the send scatter */
486   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
487   count = 0;
488   for (i=0; i<nrecvs; i++) {
489     values = rvalues + i*nmax;
490     for (j=0; j<lens[i]; j++) {
491       lrows[count++] = values[j] - base;
492     }
493   }
494   ierr = PetscFree(rvalues);CHKERRQ(ierr);
495   ierr = PetscFree(lens);CHKERRQ(ierr);
496   ierr = PetscFree(owner);CHKERRQ(ierr);
497   ierr = PetscFree(nprocs);CHKERRQ(ierr);
498 
499   /* actually zap the local rows */
500   /*
501         Zero the required rows. If the "diagonal block" of the matrix
502      is square and the user wishes to set the diagonal we use separate
503      code so that MatSetValues() is not called for each diagonal allocating
504      new memory, thus calling lots of mallocs and slowing things down.
505 
506        Contributed by: Matthew Knepley
507   */
508   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
509   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
510   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
511     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
512   } else if (diag != 0.0) {
513     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
514     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
515       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
517     }
518     for (i = 0; i < slen; i++) {
519       row  = lrows[i] + rstart;
520       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
521     }
522     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
524   } else {
525     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
526   }
527   ierr = PetscFree(lrows);CHKERRQ(ierr);
528 
529   /* wait on sends */
530   if (nsends) {
531     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
532     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
533     ierr = PetscFree(send_status);CHKERRQ(ierr);
534   }
535   ierr = PetscFree(send_waits);CHKERRQ(ierr);
536   ierr = PetscFree(svalues);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_MPIAIJ"
543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
544 {
545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546   PetscErrorCode ierr;
547   PetscInt       nt;
548 
549   PetscFunctionBegin;
550   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
551   if (nt != A->cmap.n) {
552     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
553   }
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultAdd_MPIAIJ"
563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
570   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscTruth     merged;
583 
584   PetscFunctionBegin;
585   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
586   /* do nondiagonal part */
587   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
588   if (!merged) {
589     /* send it on its way */
590     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
591     /* do local part */
592     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
593     /* receive remote parts: note this assumes the values are not actually */
594     /* added in yy until the next line, */
595     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
596   } else {
597     /* do local part */
598     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
599     /* send it on its way */
600     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
601     /* values actually were received in the Begin() but we need to call this nop */
602     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
611 {
612   MPI_Comm       comm;
613   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
614   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
615   IS             Me,Notme;
616   PetscErrorCode ierr;
617   PetscInt       M,N,first,last,*notme,i;
618   PetscMPIInt    size;
619 
620   PetscFunctionBegin;
621 
622   /* Easy test: symmetric diagonal block */
623   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
624   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
625   if (!*f) PetscFunctionReturn(0);
626   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
628   if (size == 1) PetscFunctionReturn(0);
629 
630   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
631   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
632   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
633   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
634   for (i=0; i<first; i++) notme[i] = i;
635   for (i=last; i<M; i++) notme[i-last+first] = i;
636   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
637   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
638   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
639   Aoff = Aoffs[0];
640   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
641   Boff = Boffs[0];
642   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
644   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
645   ierr = ISDestroy(Me);CHKERRQ(ierr);
646   ierr = ISDestroy(Notme);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 EXTERN_C_END
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   /* do nondiagonal part */
661   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
662   /* send it on its way */
663   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
664   /* do local part */
665   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
666   /* receive remote parts */
667   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672   This only works correctly for square matrices where the subblock A->A is the
673    diagonal block
674 */
675 #undef __FUNCT__
676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
678 {
679   PetscErrorCode ierr;
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681 
682   PetscFunctionBegin;
683   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
684   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
685     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
686   }
687   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatScale_MPIAIJ"
693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
694 {
695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
700   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatDestroy_MPIAIJ"
706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
707 {
708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712 #if defined(PETSC_USE_LOG)
713   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
714 #endif
715   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
717   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
718 #if defined (PETSC_USE_CTABLE)
719   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
720 #else
721   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
722 #endif
723   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
724   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
725   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
726   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatView_MPIAIJ_Binary"
741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
742 {
743   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
744   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
745   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
746   PetscErrorCode    ierr;
747   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
748   int               fd;
749   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
750   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
751   PetscScalar       *column_values;
752 
753   PetscFunctionBegin;
754   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
756   nz   = A->nz + B->nz;
757   if (!rank) {
758     header[0] = MAT_FILE_COOKIE;
759     header[1] = mat->rmap.N;
760     header[2] = mat->cmap.N;
761     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
762     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
763     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
764     /* get largest number of rows any processor has */
765     rlen = mat->rmap.n;
766     range = mat->rmap.range;
767     for (i=1; i<size; i++) {
768       rlen = PetscMax(rlen,range[i+1] - range[i]);
769     }
770   } else {
771     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     rlen = mat->rmap.n;
773   }
774 
775   /* load up the local row counts */
776   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
777   for (i=0; i<mat->rmap.n; i++) {
778     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
779   }
780 
781   /* store the row lengths to the file */
782   if (!rank) {
783     MPI_Status status;
784     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
785     for (i=1; i<size; i++) {
786       rlen = range[i+1] - range[i];
787       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
788       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     }
790   } else {
791     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
792   }
793   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
794 
795   /* load up the local column indices */
796   nzmax = nz; /* )th processor needs space a largest processor needs */
797   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
798   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
799   cnt  = 0;
800   for (i=0; i<mat->rmap.n; i++) {
801     for (j=B->i[i]; j<B->i[i+1]; j++) {
802       if ( (col = garray[B->j[j]]) > cstart) break;
803       column_indices[cnt++] = col;
804     }
805     for (k=A->i[i]; k<A->i[i+1]; k++) {
806       column_indices[cnt++] = A->j[k] + cstart;
807     }
808     for (; j<B->i[i+1]; j++) {
809       column_indices[cnt++] = garray[B->j[j]];
810     }
811   }
812   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
813 
814   /* store the column indices to the file */
815   if (!rank) {
816     MPI_Status status;
817     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     for (i=1; i<size; i++) {
819       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
820       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
821       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
823     }
824   } else {
825     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
826     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827   }
828   ierr = PetscFree(column_indices);CHKERRQ(ierr);
829 
830   /* load up the local column values */
831   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
832   cnt  = 0;
833   for (i=0; i<mat->rmap.n; i++) {
834     for (j=B->i[i]; j<B->i[i+1]; j++) {
835       if ( garray[B->j[j]] > cstart) break;
836       column_values[cnt++] = B->a[j];
837     }
838     for (k=A->i[i]; k<A->i[i+1]; k++) {
839       column_values[cnt++] = A->a[k];
840     }
841     for (; j<B->i[i+1]; j++) {
842       column_values[cnt++] = B->a[j];
843     }
844   }
845   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
846 
847   /* store the column values to the file */
848   if (!rank) {
849     MPI_Status status;
850     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
851     for (i=1; i<size; i++) {
852       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
853       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
854       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
856     }
857   } else {
858     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
859     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
860   }
861   ierr = PetscFree(column_values);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
868 {
869   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
870   PetscErrorCode    ierr;
871   PetscMPIInt       rank = aij->rank,size = aij->size;
872   PetscTruth        isdraw,iascii,isbinary;
873   PetscViewer       sviewer;
874   PetscViewerFormat format;
875 
876   PetscFunctionBegin;
877   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
880   if (iascii) {
881     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
882     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
883       MatInfo    info;
884       PetscTruth inodes;
885 
886       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
889       if (!inodes) {
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
891 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
892       } else {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
894 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
895       }
896       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
898       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904       PetscInt   inodecount,inodelimit,*inodes;
905       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
906       if (inodes) {
907         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
908       } else {
909         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
910       }
911       PetscFunctionReturn(0);
912     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
913       PetscFunctionReturn(0);
914     }
915   } else if (isbinary) {
916     if (size == 1) {
917       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919     } else {
920       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
921     }
922     PetscFunctionReturn(0);
923   } else if (isdraw) {
924     PetscDraw  draw;
925     PetscTruth isnull;
926     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
927     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
928   }
929 
930   if (size == 1) {
931     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
932     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
933   } else {
934     /* assemble the entire matrix onto first processor. */
935     Mat         A;
936     Mat_SeqAIJ  *Aloc;
937     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
938     PetscScalar *a;
939 
940     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
941     if (!rank) {
942       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
943     } else {
944       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
945     }
946     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
947     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
948     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
949     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqAIJ*)aij->A->data;
953     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row = mat->rmap.rstart;
955     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
959     }
960     aj = Aloc->j;
961     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
962 
963     /* copy over the B part */
964     Aloc = (Mat_SeqAIJ*)aij->B->data;
965     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
966     row  = mat->rmap.rstart;
967     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
968     ct   = cols;
969     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
973     }
974     ierr = PetscFree(ct);CHKERRQ(ierr);
975     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     /*
978        Everyone has to call to draw the matrix since the graphics waits are
979        synchronized across all processors that share the PetscDraw object
980     */
981     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
984       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
985     }
986     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     ierr = MatDestroy(A);CHKERRQ(ierr);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatView_MPIAIJ"
994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
995 {
996   PetscErrorCode ierr;
997   PetscTruth     iascii,isdraw,issocket,isbinary;
998 
999   PetscFunctionBegin;
1000   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1002   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1004   if (iascii || isdraw || isbinary || issocket) {
1005     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1006   } else {
1007     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NO_NEW_NONZERO_LOCATIONS:
1207   case MAT_YES_NEW_NONZERO_LOCATIONS:
1208   case MAT_COLUMNS_UNSORTED:
1209   case MAT_COLUMNS_SORTED:
1210   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211   case MAT_KEEP_ZEROED_ROWS:
1212   case MAT_NEW_NONZERO_LOCATION_ERR:
1213   case MAT_USE_INODES:
1214   case MAT_DO_NOT_USE_INODES:
1215   case MAT_IGNORE_ZERO_ENTRIES:
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1218     break;
1219   case MAT_ROW_ORIENTED:
1220     a->roworiented = PETSC_TRUE;
1221     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1222     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1223     break;
1224   case MAT_ROWS_SORTED:
1225   case MAT_ROWS_UNSORTED:
1226   case MAT_YES_NEW_DIAGONALS:
1227     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1228     break;
1229   case MAT_COLUMN_ORIENTED:
1230     a->roworiented = PETSC_FALSE;
1231     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1232     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1233     break;
1234   case MAT_IGNORE_OFF_PROC_ENTRIES:
1235     a->donotstash = PETSC_TRUE;
1236     break;
1237   case MAT_NO_NEW_DIAGONALS:
1238     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1239   case MAT_SYMMETRIC:
1240     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1241     break;
1242   case MAT_STRUCTURALLY_SYMMETRIC:
1243   case MAT_HERMITIAN:
1244   case MAT_SYMMETRY_ETERNAL:
1245     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1246     break;
1247   case MAT_NOT_SYMMETRIC:
1248   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1249   case MAT_NOT_HERMITIAN:
1250   case MAT_NOT_SYMMETRY_ETERNAL:
1251     break;
1252   default:
1253     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIAIJ"
1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1266   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1267   PetscInt       *cmap,*idx_p;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1278     PetscInt     max = 1,tmp;
1279     for (i=0; i<matin->rmap.n; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1285   }
1286 
1287   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1288   lrow = row - rstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]];
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!aij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1348   }
1349   aij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatNorm_MPIAIJ"
1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1356 {
1357   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1358   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1359   PetscErrorCode ierr;
1360   PetscInt       i,j,cstart = mat->cmap.rstart;
1361   PetscReal      sum = 0.0;
1362   PetscScalar    *v;
1363 
1364   PetscFunctionBegin;
1365   if (aij->size == 1) {
1366     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1367   } else {
1368     if (type == NORM_FROBENIUS) {
1369       v = amat->a;
1370       for (i=0; i<amat->nz; i++) {
1371 #if defined(PETSC_USE_COMPLEX)
1372         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1373 #else
1374         sum += (*v)*(*v); v++;
1375 #endif
1376       }
1377       v = bmat->a;
1378       for (i=0; i<bmat->nz; i++) {
1379 #if defined(PETSC_USE_COMPLEX)
1380         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1381 #else
1382         sum += (*v)*(*v); v++;
1383 #endif
1384       }
1385       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1386       *norm = sqrt(*norm);
1387     } else if (type == NORM_1) { /* max column norm */
1388       PetscReal *tmp,*tmp2;
1389       PetscInt    *jj,*garray = aij->garray;
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1392       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1393       *norm = 0.0;
1394       v = amat->a; jj = amat->j;
1395       for (j=0; j<amat->nz; j++) {
1396         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1397       }
1398       v = bmat->a; jj = bmat->j;
1399       for (j=0; j<bmat->nz; j++) {
1400         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1401       }
1402       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1403       for (j=0; j<mat->cmap.N; j++) {
1404         if (tmp2[j] > *norm) *norm = tmp2[j];
1405       }
1406       ierr = PetscFree(tmp);CHKERRQ(ierr);
1407       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1408     } else if (type == NORM_INFINITY) { /* max row norm */
1409       PetscReal ntemp = 0.0;
1410       for (j=0; j<aij->A->rmap.n; j++) {
1411         v = amat->a + amat->i[j];
1412         sum = 0.0;
1413         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1414           sum += PetscAbsScalar(*v); v++;
1415         }
1416         v = bmat->a + bmat->i[j];
1417         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1418           sum += PetscAbsScalar(*v); v++;
1419         }
1420         if (sum > ntemp) ntemp = sum;
1421       }
1422       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1423     } else {
1424       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1425     }
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatTranspose_MPIAIJ"
1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1433 {
1434   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1435   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1436   PetscErrorCode ierr;
1437   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1438   Mat            B;
1439   PetscScalar    *array;
1440 
1441   PetscFunctionBegin;
1442   if (!matout && M != N) {
1443     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1444   }
1445 
1446   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1447   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1448   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1449   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1450 
1451   /* copy over the A part */
1452   Aloc = (Mat_SeqAIJ*)a->A->data;
1453   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1454   row = A->rmap.rstart;
1455   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1456   for (i=0; i<m; i++) {
1457     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1458     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1459   }
1460   aj = Aloc->j;
1461   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1462 
1463   /* copy over the B part */
1464   Aloc = (Mat_SeqAIJ*)a->B->data;
1465   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1466   row  = A->rmap.rstart;
1467   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1468   ct   = cols;
1469   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1470   for (i=0; i<m; i++) {
1471     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1472     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1473   }
1474   ierr = PetscFree(ct);CHKERRQ(ierr);
1475   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1476   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1477   if (matout) {
1478     *matout = B;
1479   } else {
1480     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1488 {
1489   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1490   Mat            a = aij->A,b = aij->B;
1491   PetscErrorCode ierr;
1492   PetscInt       s1,s2,s3;
1493 
1494   PetscFunctionBegin;
1495   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1496   if (rr) {
1497     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1498     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1499     /* Overlap communication with computation. */
1500     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1501   }
1502   if (ll) {
1503     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1504     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1505     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1506   }
1507   /* scale  the diagonal block */
1508   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1509 
1510   if (rr) {
1511     /* Do a scatter end and then right scale the off-diagonal block */
1512     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1513     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1514   }
1515 
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1521 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1522 {
1523   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1524   PetscErrorCode ierr;
1525 
1526   PetscFunctionBegin;
1527   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1528   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1533 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1534 {
1535   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1536   PetscErrorCode ierr;
1537 
1538   PetscFunctionBegin;
1539   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatEqual_MPIAIJ"
1545 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1546 {
1547   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1548   Mat            a,b,c,d;
1549   PetscTruth     flg;
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   a = matA->A; b = matA->B;
1554   c = matB->A; d = matB->B;
1555 
1556   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1557   if (flg) {
1558     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1559   }
1560   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatCopy_MPIAIJ"
1566 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1567 {
1568   PetscErrorCode ierr;
1569   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1570   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1571 
1572   PetscFunctionBegin;
1573   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1574   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1575     /* because of the column compression in the off-processor part of the matrix a->B,
1576        the number of columns in a->B and b->B may be different, hence we cannot call
1577        the MatCopy() directly on the two parts. If need be, we can provide a more
1578        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1579        then copying the submatrices */
1580     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1581   } else {
1582     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1583     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1590 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1591 {
1592   PetscErrorCode ierr;
1593 
1594   PetscFunctionBegin;
1595   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 #include "petscblaslapack.h"
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatAXPY_MPIAIJ"
1602 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1603 {
1604   PetscErrorCode ierr;
1605   PetscInt       i;
1606   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1607   PetscBLASInt   bnz,one=1;
1608   Mat_SeqAIJ     *x,*y;
1609 
1610   PetscFunctionBegin;
1611   if (str == SAME_NONZERO_PATTERN) {
1612     PetscScalar alpha = a;
1613     x = (Mat_SeqAIJ *)xx->A->data;
1614     y = (Mat_SeqAIJ *)yy->A->data;
1615     bnz = (PetscBLASInt)x->nz;
1616     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1617     x = (Mat_SeqAIJ *)xx->B->data;
1618     y = (Mat_SeqAIJ *)yy->B->data;
1619     bnz = (PetscBLASInt)x->nz;
1620     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1621   } else if (str == SUBSET_NONZERO_PATTERN) {
1622     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1623 
1624     x = (Mat_SeqAIJ *)xx->B->data;
1625     y = (Mat_SeqAIJ *)yy->B->data;
1626     if (y->xtoy && y->XtoY != xx->B) {
1627       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1628       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1629     }
1630     if (!y->xtoy) { /* get xtoy */
1631       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1632       y->XtoY = xx->B;
1633     }
1634     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1635   } else {
1636     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1637   }
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1642 
1643 #undef __FUNCT__
1644 #define __FUNCT__ "MatConjugate_MPIAIJ"
1645 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1646 {
1647 #if defined(PETSC_USE_COMPLEX)
1648   PetscErrorCode ierr;
1649   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1653   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1654 #else
1655   PetscFunctionBegin;
1656 #endif
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatRealPart_MPIAIJ"
1662 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1663 {
1664   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1665   PetscErrorCode ierr;
1666 
1667   PetscFunctionBegin;
1668   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1669   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1675 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1676 {
1677   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1678   PetscErrorCode ierr;
1679 
1680   PetscFunctionBegin;
1681   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1682   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 /* -------------------------------------------------------------------*/
1687 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1688        MatGetRow_MPIAIJ,
1689        MatRestoreRow_MPIAIJ,
1690        MatMult_MPIAIJ,
1691 /* 4*/ MatMultAdd_MPIAIJ,
1692        MatMultTranspose_MPIAIJ,
1693        MatMultTransposeAdd_MPIAIJ,
1694        0,
1695        0,
1696        0,
1697 /*10*/ 0,
1698        0,
1699        0,
1700        MatRelax_MPIAIJ,
1701        MatTranspose_MPIAIJ,
1702 /*15*/ MatGetInfo_MPIAIJ,
1703        MatEqual_MPIAIJ,
1704        MatGetDiagonal_MPIAIJ,
1705        MatDiagonalScale_MPIAIJ,
1706        MatNorm_MPIAIJ,
1707 /*20*/ MatAssemblyBegin_MPIAIJ,
1708        MatAssemblyEnd_MPIAIJ,
1709        0,
1710        MatSetOption_MPIAIJ,
1711        MatZeroEntries_MPIAIJ,
1712 /*25*/ MatZeroRows_MPIAIJ,
1713        0,
1714        0,
1715        0,
1716        0,
1717 /*30*/ MatSetUpPreallocation_MPIAIJ,
1718        0,
1719        0,
1720        0,
1721        0,
1722 /*35*/ MatDuplicate_MPIAIJ,
1723        0,
1724        0,
1725        0,
1726        0,
1727 /*40*/ MatAXPY_MPIAIJ,
1728        MatGetSubMatrices_MPIAIJ,
1729        MatIncreaseOverlap_MPIAIJ,
1730        MatGetValues_MPIAIJ,
1731        MatCopy_MPIAIJ,
1732 /*45*/ 0,
1733        MatScale_MPIAIJ,
1734        0,
1735        0,
1736        0,
1737 /*50*/ MatSetBlockSize_MPIAIJ,
1738        0,
1739        0,
1740        0,
1741        0,
1742 /*55*/ MatFDColoringCreate_MPIAIJ,
1743        0,
1744        MatSetUnfactored_MPIAIJ,
1745        MatPermute_MPIAIJ,
1746        0,
1747 /*60*/ MatGetSubMatrix_MPIAIJ,
1748        MatDestroy_MPIAIJ,
1749        MatView_MPIAIJ,
1750        0,
1751        0,
1752 /*65*/ 0,
1753        0,
1754        0,
1755        0,
1756        0,
1757 /*70*/ 0,
1758        0,
1759        MatSetColoring_MPIAIJ,
1760 #if defined(PETSC_HAVE_ADIC)
1761        MatSetValuesAdic_MPIAIJ,
1762 #else
1763        0,
1764 #endif
1765        MatSetValuesAdifor_MPIAIJ,
1766 /*75*/ 0,
1767        0,
1768        0,
1769        0,
1770        0,
1771 /*80*/ 0,
1772        0,
1773        0,
1774        0,
1775 /*84*/ MatLoad_MPIAIJ,
1776        0,
1777        0,
1778        0,
1779        0,
1780        0,
1781 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1782        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1783        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1784        MatPtAP_Basic,
1785        MatPtAPSymbolic_MPIAIJ,
1786 /*95*/ MatPtAPNumeric_MPIAIJ,
1787        0,
1788        0,
1789        0,
1790        0,
1791 /*100*/0,
1792        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1793        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1794        MatConjugate_MPIAIJ,
1795        0,
1796 /*105*/MatSetValuesRow_MPIAIJ,
1797        MatRealPart_MPIAIJ,
1798        MatImaginaryPart_MPIAIJ};
1799 
1800 /* ----------------------------------------------------------------------------------------*/
1801 
1802 EXTERN_C_BEGIN
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1805 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1806 {
1807   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1808   PetscErrorCode ierr;
1809 
1810   PetscFunctionBegin;
1811   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1812   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 EXTERN_C_END
1816 
1817 EXTERN_C_BEGIN
1818 #undef __FUNCT__
1819 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1820 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1821 {
1822   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1823   PetscErrorCode ierr;
1824 
1825   PetscFunctionBegin;
1826   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1827   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1828   PetscFunctionReturn(0);
1829 }
1830 EXTERN_C_END
1831 
1832 #include "petscpc.h"
1833 EXTERN_C_BEGIN
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1836 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1837 {
1838   Mat_MPIAIJ     *b;
1839   PetscErrorCode ierr;
1840   PetscInt       i;
1841 
1842   PetscFunctionBegin;
1843   B->preallocated = PETSC_TRUE;
1844   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1845   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1846   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1847   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1848 
1849   B->rmap.bs = B->cmap.bs = 1;
1850   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1851   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1852   if (d_nnz) {
1853     for (i=0; i<B->rmap.n; i++) {
1854       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1855     }
1856   }
1857   if (o_nnz) {
1858     for (i=0; i<B->rmap.n; i++) {
1859       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1860     }
1861   }
1862   b = (Mat_MPIAIJ*)B->data;
1863 
1864   /* Explicitly create 2 MATSEQAIJ matrices. */
1865   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1866   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1867   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1868   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1869   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1870   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1871   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1872   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1873 
1874   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1875   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1876 
1877   PetscFunctionReturn(0);
1878 }
1879 EXTERN_C_END
1880 
1881 #undef __FUNCT__
1882 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1883 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1884 {
1885   Mat            mat;
1886   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1887   PetscErrorCode ierr;
1888 
1889   PetscFunctionBegin;
1890   *newmat       = 0;
1891   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1892   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
1893   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1894   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1895   a    = (Mat_MPIAIJ*)mat->data;
1896 
1897   mat->factor       = matin->factor;
1898   mat->rmap.bs      = matin->rmap.bs;
1899   mat->assembled    = PETSC_TRUE;
1900   mat->insertmode   = NOT_SET_VALUES;
1901   mat->preallocated = PETSC_TRUE;
1902 
1903   a->size           = oldmat->size;
1904   a->rank           = oldmat->rank;
1905   a->donotstash     = oldmat->donotstash;
1906   a->roworiented    = oldmat->roworiented;
1907   a->rowindices     = 0;
1908   a->rowvalues      = 0;
1909   a->getrowactive   = PETSC_FALSE;
1910 
1911   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
1912   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
1913 
1914   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1915   if (oldmat->colmap) {
1916 #if defined (PETSC_USE_CTABLE)
1917     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1918 #else
1919     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1920     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1921     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1922 #endif
1923   } else a->colmap = 0;
1924   if (oldmat->garray) {
1925     PetscInt len;
1926     len  = oldmat->B->cmap.n;
1927     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1928     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1929     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1930   } else a->garray = 0;
1931 
1932   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1933   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1934   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1935   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1936   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1937   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1938   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1939   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1940   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1941   *newmat = mat;
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 #include "petscsys.h"
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatLoad_MPIAIJ"
1949 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1950 {
1951   Mat            A;
1952   PetscScalar    *vals,*svals;
1953   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1954   MPI_Status     status;
1955   PetscErrorCode ierr;
1956   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1957   PetscInt       i,nz,j,rstart,rend,mmax;
1958   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1959   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
1960   PetscInt       cend,cstart,n,*rowners;
1961   int            fd;
1962 
1963   PetscFunctionBegin;
1964   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1965   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1966   if (!rank) {
1967     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1968     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1969     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1970   }
1971 
1972   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1973   M = header[1]; N = header[2];
1974   /* determine ownership of all rows */
1975   m    = M/size + ((M % size) > rank);
1976   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1977   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1978 
1979   /* First process needs enough room for process with most rows */
1980   if (!rank) {
1981     mmax       = rowners[1];
1982     for (i=2; i<size; i++) {
1983       mmax = PetscMax(mmax,rowners[i]);
1984     }
1985   } else mmax = m;
1986 
1987   rowners[0] = 0;
1988   for (i=2; i<=size; i++) {
1989     mmax       = PetscMax(mmax,rowners[i]);
1990     rowners[i] += rowners[i-1];
1991   }
1992   rstart = rowners[rank];
1993   rend   = rowners[rank+1];
1994 
1995   /* distribute row lengths to all processors */
1996   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1997   if (!rank) {
1998     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1999     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2000     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2001     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2002     for (j=0; j<m; j++) {
2003       procsnz[0] += ourlens[j];
2004     }
2005     for (i=1; i<size; i++) {
2006       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2007       /* calculate the number of nonzeros on each processor */
2008       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2009         procsnz[i] += rowlengths[j];
2010       }
2011       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2012     }
2013     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2014   } else {
2015     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2016   }
2017 
2018   if (!rank) {
2019     /* determine max buffer needed and allocate it */
2020     maxnz = 0;
2021     for (i=0; i<size; i++) {
2022       maxnz = PetscMax(maxnz,procsnz[i]);
2023     }
2024     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2025 
2026     /* read in my part of the matrix column indices  */
2027     nz   = procsnz[0];
2028     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2029     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2030 
2031     /* read in every one elses and ship off */
2032     for (i=1; i<size; i++) {
2033       nz   = procsnz[i];
2034       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2035       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2036     }
2037     ierr = PetscFree(cols);CHKERRQ(ierr);
2038   } else {
2039     /* determine buffer space needed for message */
2040     nz = 0;
2041     for (i=0; i<m; i++) {
2042       nz += ourlens[i];
2043     }
2044     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2045 
2046     /* receive message of column indices*/
2047     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2048     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2049     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2050   }
2051 
2052   /* determine column ownership if matrix is not square */
2053   if (N != M) {
2054     n      = N/size + ((N % size) > rank);
2055     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2056     cstart = cend - n;
2057   } else {
2058     cstart = rstart;
2059     cend   = rend;
2060     n      = cend - cstart;
2061   }
2062 
2063   /* loop over local rows, determining number of off diagonal entries */
2064   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2065   jj = 0;
2066   for (i=0; i<m; i++) {
2067     for (j=0; j<ourlens[i]; j++) {
2068       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2069       jj++;
2070     }
2071   }
2072 
2073   /* create our matrix */
2074   for (i=0; i<m; i++) {
2075     ourlens[i] -= offlens[i];
2076   }
2077   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2078   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2079   ierr = MatSetType(A,type);CHKERRQ(ierr);
2080   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2081 
2082   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2083   for (i=0; i<m; i++) {
2084     ourlens[i] += offlens[i];
2085   }
2086 
2087   if (!rank) {
2088     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2089 
2090     /* read in my part of the matrix numerical values  */
2091     nz   = procsnz[0];
2092     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2093 
2094     /* insert into matrix */
2095     jj      = rstart;
2096     smycols = mycols;
2097     svals   = vals;
2098     for (i=0; i<m; i++) {
2099       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2100       smycols += ourlens[i];
2101       svals   += ourlens[i];
2102       jj++;
2103     }
2104 
2105     /* read in other processors and ship out */
2106     for (i=1; i<size; i++) {
2107       nz   = procsnz[i];
2108       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2109       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2110     }
2111     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2112   } else {
2113     /* receive numeric values */
2114     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2115 
2116     /* receive message of values*/
2117     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2118     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2119     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2120 
2121     /* insert into matrix */
2122     jj      = rstart;
2123     smycols = mycols;
2124     svals   = vals;
2125     for (i=0; i<m; i++) {
2126       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2127       smycols += ourlens[i];
2128       svals   += ourlens[i];
2129       jj++;
2130     }
2131   }
2132   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2133   ierr = PetscFree(vals);CHKERRQ(ierr);
2134   ierr = PetscFree(mycols);CHKERRQ(ierr);
2135   ierr = PetscFree(rowners);CHKERRQ(ierr);
2136 
2137   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2138   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2139   *newmat = A;
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 #undef __FUNCT__
2144 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2145 /*
2146     Not great since it makes two copies of the submatrix, first an SeqAIJ
2147   in local and then by concatenating the local matrices the end result.
2148   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2149 */
2150 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2151 {
2152   PetscErrorCode ierr;
2153   PetscMPIInt    rank,size;
2154   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2155   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2156   Mat            *local,M,Mreuse;
2157   PetscScalar    *vwork,*aa;
2158   MPI_Comm       comm = mat->comm;
2159   Mat_SeqAIJ     *aij;
2160 
2161 
2162   PetscFunctionBegin;
2163   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2164   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2165 
2166   if (call ==  MAT_REUSE_MATRIX) {
2167     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2168     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2169     local = &Mreuse;
2170     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2171   } else {
2172     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2173     Mreuse = *local;
2174     ierr   = PetscFree(local);CHKERRQ(ierr);
2175   }
2176 
2177   /*
2178       m - number of local rows
2179       n - number of columns (same on all processors)
2180       rstart - first row in new global matrix generated
2181   */
2182   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2183   if (call == MAT_INITIAL_MATRIX) {
2184     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2185     ii  = aij->i;
2186     jj  = aij->j;
2187 
2188     /*
2189         Determine the number of non-zeros in the diagonal and off-diagonal
2190         portions of the matrix in order to do correct preallocation
2191     */
2192 
2193     /* first get start and end of "diagonal" columns */
2194     if (csize == PETSC_DECIDE) {
2195       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2196       if (mglobal == n) { /* square matrix */
2197 	nlocal = m;
2198       } else {
2199         nlocal = n/size + ((n % size) > rank);
2200       }
2201     } else {
2202       nlocal = csize;
2203     }
2204     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2205     rstart = rend - nlocal;
2206     if (rank == size - 1 && rend != n) {
2207       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2208     }
2209 
2210     /* next, compute all the lengths */
2211     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2212     olens = dlens + m;
2213     for (i=0; i<m; i++) {
2214       jend = ii[i+1] - ii[i];
2215       olen = 0;
2216       dlen = 0;
2217       for (j=0; j<jend; j++) {
2218         if (*jj < rstart || *jj >= rend) olen++;
2219         else dlen++;
2220         jj++;
2221       }
2222       olens[i] = olen;
2223       dlens[i] = dlen;
2224     }
2225     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2226     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2227     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2228     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2229     ierr = PetscFree(dlens);CHKERRQ(ierr);
2230   } else {
2231     PetscInt ml,nl;
2232 
2233     M = *newmat;
2234     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2235     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2236     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2237     /*
2238          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2239        rather than the slower MatSetValues().
2240     */
2241     M->was_assembled = PETSC_TRUE;
2242     M->assembled     = PETSC_FALSE;
2243   }
2244   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2245   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2246   ii  = aij->i;
2247   jj  = aij->j;
2248   aa  = aij->a;
2249   for (i=0; i<m; i++) {
2250     row   = rstart + i;
2251     nz    = ii[i+1] - ii[i];
2252     cwork = jj;     jj += nz;
2253     vwork = aa;     aa += nz;
2254     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2255   }
2256 
2257   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2258   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2259   *newmat = M;
2260 
2261   /* save submatrix used in processor for next request */
2262   if (call ==  MAT_INITIAL_MATRIX) {
2263     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2264     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2265   }
2266 
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 EXTERN_C_BEGIN
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2273 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2274 {
2275   PetscInt       m,cstart, cend,j,nnz,i,d;
2276   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2277   const PetscInt *JJ;
2278   PetscScalar    *values;
2279   PetscErrorCode ierr;
2280 
2281   PetscFunctionBegin;
2282   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2283 
2284   B->rmap.bs = B->cmap.bs = 1;
2285   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2286   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2287   m      = B->rmap.n;
2288   cstart = B->cmap.rstart;
2289   cend   = B->cmap.rend;
2290   rstart = B->rmap.rstart;
2291 
2292   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2293   o_nnz = d_nnz + m;
2294 
2295   for (i=0; i<m; i++) {
2296     nnz     = Ii[i+1]- Ii[i];
2297     JJ      = J + Ii[i];
2298     nnz_max = PetscMax(nnz_max,nnz);
2299     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2300     for (j=0; j<nnz; j++) {
2301       if (*JJ >= cstart) break;
2302       JJ++;
2303     }
2304     d = 0;
2305     for (; j<nnz; j++) {
2306       if (*JJ++ >= cend) break;
2307       d++;
2308     }
2309     d_nnz[i] = d;
2310     o_nnz[i] = nnz - d;
2311   }
2312   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2313   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2314 
2315   if (v) values = (PetscScalar*)v;
2316   else {
2317     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2318     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2319   }
2320 
2321   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2322   for (i=0; i<m; i++) {
2323     ii   = i + rstart;
2324     nnz  = Ii[i+1]- Ii[i];
2325     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2326   }
2327   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2328   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2329   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2330 
2331   if (!v) {
2332     ierr = PetscFree(values);CHKERRQ(ierr);
2333   }
2334   PetscFunctionReturn(0);
2335 }
2336 EXTERN_C_END
2337 
2338 #undef __FUNCT__
2339 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2340 /*@
2341    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2342    (the default parallel PETSc format).
2343 
2344    Collective on MPI_Comm
2345 
2346    Input Parameters:
2347 +  B - the matrix
2348 .  i - the indices into j for the start of each local row (starts with zero)
2349 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2350 -  v - optional values in the matrix
2351 
2352    Level: developer
2353 
2354    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2355      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2356 
2357 .keywords: matrix, aij, compressed row, sparse, parallel
2358 
2359 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2360           MatCreateSeqAIJWithArrays()
2361 @*/
2362 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2363 {
2364   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2365 
2366   PetscFunctionBegin;
2367   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2368   if (f) {
2369     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2370   }
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2376 /*@C
2377    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2378    (the default parallel PETSc format).  For good matrix assembly performance
2379    the user should preallocate the matrix storage by setting the parameters
2380    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2381    performance can be increased by more than a factor of 50.
2382 
2383    Collective on MPI_Comm
2384 
2385    Input Parameters:
2386 +  A - the matrix
2387 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2388            (same value is used for all local rows)
2389 .  d_nnz - array containing the number of nonzeros in the various rows of the
2390            DIAGONAL portion of the local submatrix (possibly different for each row)
2391            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2392            The size of this array is equal to the number of local rows, i.e 'm'.
2393            You must leave room for the diagonal entry even if it is zero.
2394 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2395            submatrix (same value is used for all local rows).
2396 -  o_nnz - array containing the number of nonzeros in the various rows of the
2397            OFF-DIAGONAL portion of the local submatrix (possibly different for
2398            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2399            structure. The size of this array is equal to the number
2400            of local rows, i.e 'm'.
2401 
2402    If the *_nnz parameter is given then the *_nz parameter is ignored
2403 
2404    The AIJ format (also called the Yale sparse matrix format or
2405    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2406    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2407 
2408    The parallel matrix is partitioned such that the first m0 rows belong to
2409    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2410    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2411 
2412    The DIAGONAL portion of the local submatrix of a processor can be defined
2413    as the submatrix which is obtained by extraction the part corresponding
2414    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2415    first row that belongs to the processor, and r2 is the last row belonging
2416    to the this processor. This is a square mxm matrix. The remaining portion
2417    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2418 
2419    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2420 
2421    Example usage:
2422 
2423    Consider the following 8x8 matrix with 34 non-zero values, that is
2424    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2425    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2426    as follows:
2427 
2428 .vb
2429             1  2  0  |  0  3  0  |  0  4
2430     Proc0   0  5  6  |  7  0  0  |  8  0
2431             9  0 10  | 11  0  0  | 12  0
2432     -------------------------------------
2433            13  0 14  | 15 16 17  |  0  0
2434     Proc1   0 18  0  | 19 20 21  |  0  0
2435             0  0  0  | 22 23  0  | 24  0
2436     -------------------------------------
2437     Proc2  25 26 27  |  0  0 28  | 29  0
2438            30  0  0  | 31 32 33  |  0 34
2439 .ve
2440 
2441    This can be represented as a collection of submatrices as:
2442 
2443 .vb
2444       A B C
2445       D E F
2446       G H I
2447 .ve
2448 
2449    Where the submatrices A,B,C are owned by proc0, D,E,F are
2450    owned by proc1, G,H,I are owned by proc2.
2451 
2452    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2453    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2454    The 'M','N' parameters are 8,8, and have the same values on all procs.
2455 
2456    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2457    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2458    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2459    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2460    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2461    matrix, ans [DF] as another SeqAIJ matrix.
2462 
2463    When d_nz, o_nz parameters are specified, d_nz storage elements are
2464    allocated for every row of the local diagonal submatrix, and o_nz
2465    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2466    One way to choose d_nz and o_nz is to use the max nonzerors per local
2467    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2468    In this case, the values of d_nz,o_nz are:
2469 .vb
2470      proc0 : dnz = 2, o_nz = 2
2471      proc1 : dnz = 3, o_nz = 2
2472      proc2 : dnz = 1, o_nz = 4
2473 .ve
2474    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2475    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2476    for proc3. i.e we are using 12+15+10=37 storage locations to store
2477    34 values.
2478 
2479    When d_nnz, o_nnz parameters are specified, the storage is specified
2480    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2481    In the above case the values for d_nnz,o_nnz are:
2482 .vb
2483      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2484      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2485      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2486 .ve
2487    Here the space allocated is sum of all the above values i.e 34, and
2488    hence pre-allocation is perfect.
2489 
2490    Level: intermediate
2491 
2492 .keywords: matrix, aij, compressed row, sparse, parallel
2493 
2494 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2495           MPIAIJ
2496 @*/
2497 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2498 {
2499   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2500 
2501   PetscFunctionBegin;
2502   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2503   if (f) {
2504     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2505   }
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 #undef __FUNCT__
2510 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
2511 /*@C
2512      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
2513          CSR format the local rows.
2514 
2515    Collective on MPI_Comm
2516 
2517    Input Parameters:
2518 +  comm - MPI communicator
2519 .  m - number of local rows (Cannot be PETSC_DECIDE)
2520 .  n - This value should be the same as the local size used in creating the
2521        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2522        calculated if N is given) For square matrices n is almost always m.
2523 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2524 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2525 .   i - row indices
2526 .   j - column indices
2527 -   a - matrix values
2528 
2529    Output Parameter:
2530 .   mat - the matrix
2531    Level: intermediate
2532 
2533    Notes:
2534        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2535      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2536      called this routine.
2537 
2538        The i and j indices are 0 based
2539 
2540 .keywords: matrix, aij, compressed row, sparse, parallel
2541 
2542 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2543           MPIAIJ, MatCreateMPIAIJ()
2544 @*/
2545 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2546 {
2547   PetscErrorCode ierr;
2548 
2549  PetscFunctionBegin;
2550   if (i[0]) {
2551     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2552   }
2553   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2554   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2555   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
2556   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatCreateMPIAIJ"
2562 /*@C
2563    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2564    (the default parallel PETSc format).  For good matrix assembly performance
2565    the user should preallocate the matrix storage by setting the parameters
2566    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2567    performance can be increased by more than a factor of 50.
2568 
2569    Collective on MPI_Comm
2570 
2571    Input Parameters:
2572 +  comm - MPI communicator
2573 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2574            This value should be the same as the local size used in creating the
2575            y vector for the matrix-vector product y = Ax.
2576 .  n - This value should be the same as the local size used in creating the
2577        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2578        calculated if N is given) For square matrices n is almost always m.
2579 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2580 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2581 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2582            (same value is used for all local rows)
2583 .  d_nnz - array containing the number of nonzeros in the various rows of the
2584            DIAGONAL portion of the local submatrix (possibly different for each row)
2585            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2586            The size of this array is equal to the number of local rows, i.e 'm'.
2587            You must leave room for the diagonal entry even if it is zero.
2588 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2589            submatrix (same value is used for all local rows).
2590 -  o_nnz - array containing the number of nonzeros in the various rows of the
2591            OFF-DIAGONAL portion of the local submatrix (possibly different for
2592            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2593            structure. The size of this array is equal to the number
2594            of local rows, i.e 'm'.
2595 
2596    Output Parameter:
2597 .  A - the matrix
2598 
2599    Notes:
2600    If the *_nnz parameter is given then the *_nz parameter is ignored
2601 
2602    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2603    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2604    storage requirements for this matrix.
2605 
2606    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2607    processor than it must be used on all processors that share the object for
2608    that argument.
2609 
2610    The user MUST specify either the local or global matrix dimensions
2611    (possibly both).
2612 
2613    The parallel matrix is partitioned such that the first m0 rows belong to
2614    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2615    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2616 
2617    The DIAGONAL portion of the local submatrix of a processor can be defined
2618    as the submatrix which is obtained by extraction the part corresponding
2619    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2620    first row that belongs to the processor, and r2 is the last row belonging
2621    to the this processor. This is a square mxm matrix. The remaining portion
2622    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2623 
2624    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2625 
2626    When calling this routine with a single process communicator, a matrix of
2627    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2628    type of communicator, use the construction mechanism:
2629      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2630 
2631    By default, this format uses inodes (identical nodes) when possible.
2632    We search for consecutive rows with the same nonzero structure, thereby
2633    reusing matrix information to achieve increased efficiency.
2634 
2635    Options Database Keys:
2636 +  -mat_no_inode  - Do not use inodes
2637 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2638 -  -mat_aij_oneindex - Internally use indexing starting at 1
2639         rather than 0.  Note that when calling MatSetValues(),
2640         the user still MUST index entries starting at 0!
2641 
2642 
2643    Example usage:
2644 
2645    Consider the following 8x8 matrix with 34 non-zero values, that is
2646    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2647    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2648    as follows:
2649 
2650 .vb
2651             1  2  0  |  0  3  0  |  0  4
2652     Proc0   0  5  6  |  7  0  0  |  8  0
2653             9  0 10  | 11  0  0  | 12  0
2654     -------------------------------------
2655            13  0 14  | 15 16 17  |  0  0
2656     Proc1   0 18  0  | 19 20 21  |  0  0
2657             0  0  0  | 22 23  0  | 24  0
2658     -------------------------------------
2659     Proc2  25 26 27  |  0  0 28  | 29  0
2660            30  0  0  | 31 32 33  |  0 34
2661 .ve
2662 
2663    This can be represented as a collection of submatrices as:
2664 
2665 .vb
2666       A B C
2667       D E F
2668       G H I
2669 .ve
2670 
2671    Where the submatrices A,B,C are owned by proc0, D,E,F are
2672    owned by proc1, G,H,I are owned by proc2.
2673 
2674    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2675    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2676    The 'M','N' parameters are 8,8, and have the same values on all procs.
2677 
2678    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2679    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2680    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2681    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2682    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2683    matrix, ans [DF] as another SeqAIJ matrix.
2684 
2685    When d_nz, o_nz parameters are specified, d_nz storage elements are
2686    allocated for every row of the local diagonal submatrix, and o_nz
2687    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2688    One way to choose d_nz and o_nz is to use the max nonzerors per local
2689    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2690    In this case, the values of d_nz,o_nz are:
2691 .vb
2692      proc0 : dnz = 2, o_nz = 2
2693      proc1 : dnz = 3, o_nz = 2
2694      proc2 : dnz = 1, o_nz = 4
2695 .ve
2696    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2697    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2698    for proc3. i.e we are using 12+15+10=37 storage locations to store
2699    34 values.
2700 
2701    When d_nnz, o_nnz parameters are specified, the storage is specified
2702    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2703    In the above case the values for d_nnz,o_nnz are:
2704 .vb
2705      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2706      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2707      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2708 .ve
2709    Here the space allocated is sum of all the above values i.e 34, and
2710    hence pre-allocation is perfect.
2711 
2712    Level: intermediate
2713 
2714 .keywords: matrix, aij, compressed row, sparse, parallel
2715 
2716 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2717           MPIAIJ, MatCreateMPIAIJWithArrays()
2718 @*/
2719 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2720 {
2721   PetscErrorCode ierr;
2722   PetscMPIInt    size;
2723 
2724   PetscFunctionBegin;
2725   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2726   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2727   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2728   if (size > 1) {
2729     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2730     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2731   } else {
2732     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2733     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2734   }
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 #undef __FUNCT__
2739 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2740 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2741 {
2742   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2743 
2744   PetscFunctionBegin;
2745   *Ad     = a->A;
2746   *Ao     = a->B;
2747   *colmap = a->garray;
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2753 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2754 {
2755   PetscErrorCode ierr;
2756   PetscInt       i;
2757   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2758 
2759   PetscFunctionBegin;
2760   if (coloring->ctype == IS_COLORING_LOCAL) {
2761     ISColoringValue *allcolors,*colors;
2762     ISColoring      ocoloring;
2763 
2764     /* set coloring for diagonal portion */
2765     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2766 
2767     /* set coloring for off-diagonal portion */
2768     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2769     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2770     for (i=0; i<a->B->cmap.n; i++) {
2771       colors[i] = allcolors[a->garray[i]];
2772     }
2773     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2774     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2775     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2776     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2777   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2778     ISColoringValue *colors;
2779     PetscInt        *larray;
2780     ISColoring      ocoloring;
2781 
2782     /* set coloring for diagonal portion */
2783     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2784     for (i=0; i<a->A->cmap.n; i++) {
2785       larray[i] = i + A->cmap.rstart;
2786     }
2787     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2788     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2789     for (i=0; i<a->A->cmap.n; i++) {
2790       colors[i] = coloring->colors[larray[i]];
2791     }
2792     ierr = PetscFree(larray);CHKERRQ(ierr);
2793     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2794     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2795     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2796 
2797     /* set coloring for off-diagonal portion */
2798     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2799     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2800     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2801     for (i=0; i<a->B->cmap.n; i++) {
2802       colors[i] = coloring->colors[larray[i]];
2803     }
2804     ierr = PetscFree(larray);CHKERRQ(ierr);
2805     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2806     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2807     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2808   } else {
2809     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2810   }
2811 
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 #if defined(PETSC_HAVE_ADIC)
2816 #undef __FUNCT__
2817 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2818 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2819 {
2820   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2821   PetscErrorCode ierr;
2822 
2823   PetscFunctionBegin;
2824   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2825   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2826   PetscFunctionReturn(0);
2827 }
2828 #endif
2829 
2830 #undef __FUNCT__
2831 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2832 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2833 {
2834   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2835   PetscErrorCode ierr;
2836 
2837   PetscFunctionBegin;
2838   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2839   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 #undef __FUNCT__
2844 #define __FUNCT__ "MatMerge"
2845 /*@C
2846       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2847                  matrices from each processor
2848 
2849     Collective on MPI_Comm
2850 
2851    Input Parameters:
2852 +    comm - the communicators the parallel matrix will live on
2853 .    inmat - the input sequential matrices
2854 .    n - number of local columns (or PETSC_DECIDE)
2855 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2856 
2857    Output Parameter:
2858 .    outmat - the parallel matrix generated
2859 
2860     Level: advanced
2861 
2862    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2863 
2864 @*/
2865 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2866 {
2867   PetscErrorCode ierr;
2868   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
2869   PetscInt       *indx;
2870   PetscScalar    *values;
2871 
2872   PetscFunctionBegin;
2873   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2874   if (scall == MAT_INITIAL_MATRIX){
2875     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2876     if (n == PETSC_DECIDE){
2877       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2878     }
2879     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2880     rstart -= m;
2881 
2882     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2883     for (i=0;i<m;i++) {
2884       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2885       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2886       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2887     }
2888     /* This routine will ONLY return MPIAIJ type matrix */
2889     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2890     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2891     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2892     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2893     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2894 
2895   } else if (scall == MAT_REUSE_MATRIX){
2896     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2897   } else {
2898     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2899   }
2900 
2901   for (i=0;i<m;i++) {
2902     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2903     Ii    = i + rstart;
2904     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2905     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2906   }
2907   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2908   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2909   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2910 
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 #undef __FUNCT__
2915 #define __FUNCT__ "MatFileSplit"
2916 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2917 {
2918   PetscErrorCode    ierr;
2919   PetscMPIInt       rank;
2920   PetscInt          m,N,i,rstart,nnz;
2921   size_t            len;
2922   const PetscInt    *indx;
2923   PetscViewer       out;
2924   char              *name;
2925   Mat               B;
2926   const PetscScalar *values;
2927 
2928   PetscFunctionBegin;
2929   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2930   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2931   /* Should this be the type of the diagonal block of A? */
2932   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2933   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2934   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2935   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2936   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2937   for (i=0;i<m;i++) {
2938     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2939     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2940     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2941   }
2942   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2943   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2944 
2945   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2946   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2947   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2948   sprintf(name,"%s.%d",outfile,rank);
2949   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
2950   ierr = PetscFree(name);
2951   ierr = MatView(B,out);CHKERRQ(ierr);
2952   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2953   ierr = MatDestroy(B);CHKERRQ(ierr);
2954   PetscFunctionReturn(0);
2955 }
2956 
2957 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2958 #undef __FUNCT__
2959 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2960 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2961 {
2962   PetscErrorCode       ierr;
2963   Mat_Merge_SeqsToMPI  *merge;
2964   PetscObjectContainer container;
2965 
2966   PetscFunctionBegin;
2967   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2968   if (container) {
2969     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2970     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2971     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2972     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2973     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2974     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2975     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2976     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2977     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
2978     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
2979     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
2980     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
2981 
2982     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2983     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2984   }
2985   ierr = PetscFree(merge);CHKERRQ(ierr);
2986 
2987   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 #include "src/mat/utils/freespace.h"
2992 #include "petscbt.h"
2993 static PetscEvent logkey_seqstompinum = 0;
2994 #undef __FUNCT__
2995 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2996 /*@C
2997       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2998                  matrices from each processor
2999 
3000     Collective on MPI_Comm
3001 
3002    Input Parameters:
3003 +    comm - the communicators the parallel matrix will live on
3004 .    seqmat - the input sequential matrices
3005 .    m - number of local rows (or PETSC_DECIDE)
3006 .    n - number of local columns (or PETSC_DECIDE)
3007 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3008 
3009    Output Parameter:
3010 .    mpimat - the parallel matrix generated
3011 
3012     Level: advanced
3013 
3014    Notes:
3015      The dimensions of the sequential matrix in each processor MUST be the same.
3016      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3017      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3018 @*/
3019 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3020 {
3021   PetscErrorCode       ierr;
3022   MPI_Comm             comm=mpimat->comm;
3023   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3024   PetscMPIInt          size,rank,taga,*len_s;
3025   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3026   PetscInt             proc,m;
3027   PetscInt             **buf_ri,**buf_rj;
3028   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3029   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3030   MPI_Request          *s_waits,*r_waits;
3031   MPI_Status           *status;
3032   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3033   Mat_Merge_SeqsToMPI  *merge;
3034   PetscObjectContainer container;
3035 
3036   PetscFunctionBegin;
3037   if (!logkey_seqstompinum) {
3038     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3039   }
3040   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3041 
3042   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3043   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3044 
3045   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3046   if (container) {
3047     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3048   }
3049   bi     = merge->bi;
3050   bj     = merge->bj;
3051   buf_ri = merge->buf_ri;
3052   buf_rj = merge->buf_rj;
3053 
3054   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3055   owners = merge->rowmap.range;
3056   len_s  = merge->len_s;
3057 
3058   /* send and recv matrix values */
3059   /*-----------------------------*/
3060   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3061   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3062 
3063   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3064   for (proc=0,k=0; proc<size; proc++){
3065     if (!len_s[proc]) continue;
3066     i = owners[proc];
3067     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3068     k++;
3069   }
3070 
3071   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3072   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3073   ierr = PetscFree(status);CHKERRQ(ierr);
3074 
3075   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3076   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3077 
3078   /* insert mat values of mpimat */
3079   /*----------------------------*/
3080   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3081   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3082   nextrow = buf_ri_k + merge->nrecv;
3083   nextai  = nextrow + merge->nrecv;
3084 
3085   for (k=0; k<merge->nrecv; k++){
3086     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3087     nrows = *(buf_ri_k[k]);
3088     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3089     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3090   }
3091 
3092   /* set values of ba */
3093   m = merge->rowmap.n;
3094   for (i=0; i<m; i++) {
3095     arow = owners[rank] + i;
3096     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3097     bnzi = bi[i+1] - bi[i];
3098     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3099 
3100     /* add local non-zero vals of this proc's seqmat into ba */
3101     anzi = ai[arow+1] - ai[arow];
3102     aj   = a->j + ai[arow];
3103     aa   = a->a + ai[arow];
3104     nextaj = 0;
3105     for (j=0; nextaj<anzi; j++){
3106       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3107         ba_i[j] += aa[nextaj++];
3108       }
3109     }
3110 
3111     /* add received vals into ba */
3112     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3113       /* i-th row */
3114       if (i == *nextrow[k]) {
3115         anzi = *(nextai[k]+1) - *nextai[k];
3116         aj   = buf_rj[k] + *(nextai[k]);
3117         aa   = abuf_r[k] + *(nextai[k]);
3118         nextaj = 0;
3119         for (j=0; nextaj<anzi; j++){
3120           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3121             ba_i[j] += aa[nextaj++];
3122           }
3123         }
3124         nextrow[k]++; nextai[k]++;
3125       }
3126     }
3127     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3128   }
3129   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3130   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3131 
3132   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3133   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3134   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3135   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3136   PetscFunctionReturn(0);
3137 }
3138 
3139 static PetscEvent logkey_seqstompisym = 0;
3140 #undef __FUNCT__
3141 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3142 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3143 {
3144   PetscErrorCode       ierr;
3145   Mat                  B_mpi;
3146   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3147   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3148   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3149   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3150   PetscInt             len,proc,*dnz,*onz;
3151   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3152   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3153   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3154   MPI_Status           *status;
3155   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3156   PetscBT              lnkbt;
3157   Mat_Merge_SeqsToMPI  *merge;
3158   PetscObjectContainer container;
3159 
3160   PetscFunctionBegin;
3161   if (!logkey_seqstompisym) {
3162     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3163   }
3164   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3165 
3166   /* make sure it is a PETSc comm */
3167   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3168   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3169   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3170 
3171   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3172   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3173 
3174   /* determine row ownership */
3175   /*---------------------------------------------------------*/
3176   merge->rowmap.n = m;
3177   merge->rowmap.N = M;
3178   merge->rowmap.bs = 1;
3179   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3180   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3181   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3182 
3183   m      = merge->rowmap.n;
3184   M      = merge->rowmap.N;
3185   owners = merge->rowmap.range;
3186 
3187   /* determine the number of messages to send, their lengths */
3188   /*---------------------------------------------------------*/
3189   len_s  = merge->len_s;
3190 
3191   len = 0;  /* length of buf_si[] */
3192   merge->nsend = 0;
3193   for (proc=0; proc<size; proc++){
3194     len_si[proc] = 0;
3195     if (proc == rank){
3196       len_s[proc] = 0;
3197     } else {
3198       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3199       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3200     }
3201     if (len_s[proc]) {
3202       merge->nsend++;
3203       nrows = 0;
3204       for (i=owners[proc]; i<owners[proc+1]; i++){
3205         if (ai[i+1] > ai[i]) nrows++;
3206       }
3207       len_si[proc] = 2*(nrows+1);
3208       len += len_si[proc];
3209     }
3210   }
3211 
3212   /* determine the number and length of messages to receive for ij-structure */
3213   /*-------------------------------------------------------------------------*/
3214   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3215   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3216 
3217   /* post the Irecv of j-structure */
3218   /*-------------------------------*/
3219   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3220   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3221 
3222   /* post the Isend of j-structure */
3223   /*--------------------------------*/
3224   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3225   sj_waits = si_waits + merge->nsend;
3226 
3227   for (proc=0, k=0; proc<size; proc++){
3228     if (!len_s[proc]) continue;
3229     i = owners[proc];
3230     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3231     k++;
3232   }
3233 
3234   /* receives and sends of j-structure are complete */
3235   /*------------------------------------------------*/
3236   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3237   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3238 
3239   /* send and recv i-structure */
3240   /*---------------------------*/
3241   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3242   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3243 
3244   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3245   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3246   for (proc=0,k=0; proc<size; proc++){
3247     if (!len_s[proc]) continue;
3248     /* form outgoing message for i-structure:
3249          buf_si[0]:                 nrows to be sent
3250                [1:nrows]:           row index (global)
3251                [nrows+1:2*nrows+1]: i-structure index
3252     */
3253     /*-------------------------------------------*/
3254     nrows = len_si[proc]/2 - 1;
3255     buf_si_i    = buf_si + nrows+1;
3256     buf_si[0]   = nrows;
3257     buf_si_i[0] = 0;
3258     nrows = 0;
3259     for (i=owners[proc]; i<owners[proc+1]; i++){
3260       anzi = ai[i+1] - ai[i];
3261       if (anzi) {
3262         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3263         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3264         nrows++;
3265       }
3266     }
3267     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3268     k++;
3269     buf_si += len_si[proc];
3270   }
3271 
3272   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3273   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3274 
3275   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3276   for (i=0; i<merge->nrecv; i++){
3277     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3278   }
3279 
3280   ierr = PetscFree(len_si);CHKERRQ(ierr);
3281   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3282   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3283   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3284   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3285   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3286   ierr = PetscFree(status);CHKERRQ(ierr);
3287 
3288   /* compute a local seq matrix in each processor */
3289   /*----------------------------------------------*/
3290   /* allocate bi array and free space for accumulating nonzero column info */
3291   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3292   bi[0] = 0;
3293 
3294   /* create and initialize a linked list */
3295   nlnk = N+1;
3296   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3297 
3298   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3299   len = 0;
3300   len  = ai[owners[rank+1]] - ai[owners[rank]];
3301   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3302   current_space = free_space;
3303 
3304   /* determine symbolic info for each local row */
3305   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3306   nextrow = buf_ri_k + merge->nrecv;
3307   nextai  = nextrow + merge->nrecv;
3308   for (k=0; k<merge->nrecv; k++){
3309     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3310     nrows = *buf_ri_k[k];
3311     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3312     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3313   }
3314 
3315   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3316   len = 0;
3317   for (i=0;i<m;i++) {
3318     bnzi   = 0;
3319     /* add local non-zero cols of this proc's seqmat into lnk */
3320     arow   = owners[rank] + i;
3321     anzi   = ai[arow+1] - ai[arow];
3322     aj     = a->j + ai[arow];
3323     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3324     bnzi += nlnk;
3325     /* add received col data into lnk */
3326     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3327       if (i == *nextrow[k]) { /* i-th row */
3328         anzi = *(nextai[k]+1) - *nextai[k];
3329         aj   = buf_rj[k] + *nextai[k];
3330         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3331         bnzi += nlnk;
3332         nextrow[k]++; nextai[k]++;
3333       }
3334     }
3335     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3336 
3337     /* if free space is not available, make more free space */
3338     if (current_space->local_remaining<bnzi) {
3339       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3340       nspacedouble++;
3341     }
3342     /* copy data into free space, then initialize lnk */
3343     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3344     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3345 
3346     current_space->array           += bnzi;
3347     current_space->local_used      += bnzi;
3348     current_space->local_remaining -= bnzi;
3349 
3350     bi[i+1] = bi[i] + bnzi;
3351   }
3352 
3353   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3354 
3355   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3356   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3357   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3358 
3359   /* create symbolic parallel matrix B_mpi */
3360   /*---------------------------------------*/
3361   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3362   if (n==PETSC_DECIDE) {
3363     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3364   } else {
3365     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3366   }
3367   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3368   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3369   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3370 
3371   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3372   B_mpi->assembled     = PETSC_FALSE;
3373   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3374   merge->bi            = bi;
3375   merge->bj            = bj;
3376   merge->buf_ri        = buf_ri;
3377   merge->buf_rj        = buf_rj;
3378   merge->coi           = PETSC_NULL;
3379   merge->coj           = PETSC_NULL;
3380   merge->owners_co     = PETSC_NULL;
3381 
3382   /* attach the supporting struct to B_mpi for reuse */
3383   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3384   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3385   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3386   *mpimat = B_mpi;
3387 
3388   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3389   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3390   PetscFunctionReturn(0);
3391 }
3392 
3393 static PetscEvent logkey_seqstompi = 0;
3394 #undef __FUNCT__
3395 #define __FUNCT__ "MatMerge_SeqsToMPI"
3396 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3397 {
3398   PetscErrorCode   ierr;
3399 
3400   PetscFunctionBegin;
3401   if (!logkey_seqstompi) {
3402     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3403   }
3404   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3405   if (scall == MAT_INITIAL_MATRIX){
3406     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3407   }
3408   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3409   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3410   PetscFunctionReturn(0);
3411 }
3412 static PetscEvent logkey_getlocalmat = 0;
3413 #undef __FUNCT__
3414 #define __FUNCT__ "MatGetLocalMat"
3415 /*@C
3416      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3417 
3418     Not Collective
3419 
3420    Input Parameters:
3421 +    A - the matrix
3422 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3423 
3424    Output Parameter:
3425 .    A_loc - the local sequential matrix generated
3426 
3427     Level: developer
3428 
3429 @*/
3430 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3431 {
3432   PetscErrorCode  ierr;
3433   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3434   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3435   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3436   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3437   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3438   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3439 
3440   PetscFunctionBegin;
3441   if (!logkey_getlocalmat) {
3442     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3443   }
3444   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3445   if (scall == MAT_INITIAL_MATRIX){
3446     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3447     ci[0] = 0;
3448     for (i=0; i<am; i++){
3449       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3450     }
3451     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3452     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3453     k = 0;
3454     for (i=0; i<am; i++) {
3455       ncols_o = bi[i+1] - bi[i];
3456       ncols_d = ai[i+1] - ai[i];
3457       /* off-diagonal portion of A */
3458       for (jo=0; jo<ncols_o; jo++) {
3459         col = cmap[*bj];
3460         if (col >= cstart) break;
3461         cj[k]   = col; bj++;
3462         ca[k++] = *ba++;
3463       }
3464       /* diagonal portion of A */
3465       for (j=0; j<ncols_d; j++) {
3466         cj[k]   = cstart + *aj++;
3467         ca[k++] = *aa++;
3468       }
3469       /* off-diagonal portion of A */
3470       for (j=jo; j<ncols_o; j++) {
3471         cj[k]   = cmap[*bj++];
3472         ca[k++] = *ba++;
3473       }
3474     }
3475     /* put together the new matrix */
3476     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3477     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3478     /* Since these are PETSc arrays, change flags to free them as necessary. */
3479     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3480     mat->free_a  = PETSC_TRUE;
3481     mat->free_ij = PETSC_TRUE;
3482     mat->nonew   = 0;
3483   } else if (scall == MAT_REUSE_MATRIX){
3484     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3485     ci = mat->i; cj = mat->j; ca = mat->a;
3486     for (i=0; i<am; i++) {
3487       /* off-diagonal portion of A */
3488       ncols_o = bi[i+1] - bi[i];
3489       for (jo=0; jo<ncols_o; jo++) {
3490         col = cmap[*bj];
3491         if (col >= cstart) break;
3492         *ca++ = *ba++; bj++;
3493       }
3494       /* diagonal portion of A */
3495       ncols_d = ai[i+1] - ai[i];
3496       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3497       /* off-diagonal portion of A */
3498       for (j=jo; j<ncols_o; j++) {
3499         *ca++ = *ba++; bj++;
3500       }
3501     }
3502   } else {
3503     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3504   }
3505 
3506   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3507   PetscFunctionReturn(0);
3508 }
3509 
3510 static PetscEvent logkey_getlocalmatcondensed = 0;
3511 #undef __FUNCT__
3512 #define __FUNCT__ "MatGetLocalMatCondensed"
3513 /*@C
3514      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3515 
3516     Not Collective
3517 
3518    Input Parameters:
3519 +    A - the matrix
3520 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3521 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3522 
3523    Output Parameter:
3524 .    A_loc - the local sequential matrix generated
3525 
3526     Level: developer
3527 
3528 @*/
3529 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3530 {
3531   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3532   PetscErrorCode    ierr;
3533   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3534   IS                isrowa,iscola;
3535   Mat               *aloc;
3536 
3537   PetscFunctionBegin;
3538   if (!logkey_getlocalmatcondensed) {
3539     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3540   }
3541   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3542   if (!row){
3543     start = A->rmap.rstart; end = A->rmap.rend;
3544     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3545   } else {
3546     isrowa = *row;
3547   }
3548   if (!col){
3549     start = A->cmap.rstart;
3550     cmap  = a->garray;
3551     nzA   = a->A->cmap.n;
3552     nzB   = a->B->cmap.n;
3553     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3554     ncols = 0;
3555     for (i=0; i<nzB; i++) {
3556       if (cmap[i] < start) idx[ncols++] = cmap[i];
3557       else break;
3558     }
3559     imark = i;
3560     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3561     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3562     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3563     ierr = PetscFree(idx);CHKERRQ(ierr);
3564   } else {
3565     iscola = *col;
3566   }
3567   if (scall != MAT_INITIAL_MATRIX){
3568     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3569     aloc[0] = *A_loc;
3570   }
3571   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3572   *A_loc = aloc[0];
3573   ierr = PetscFree(aloc);CHKERRQ(ierr);
3574   if (!row){
3575     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3576   }
3577   if (!col){
3578     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3579   }
3580   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3581   PetscFunctionReturn(0);
3582 }
3583 
3584 static PetscEvent logkey_GetBrowsOfAcols = 0;
3585 #undef __FUNCT__
3586 #define __FUNCT__ "MatGetBrowsOfAcols"
3587 /*@C
3588     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3589 
3590     Collective on Mat
3591 
3592    Input Parameters:
3593 +    A,B - the matrices in mpiaij format
3594 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3595 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3596 
3597    Output Parameter:
3598 +    rowb, colb - index sets of rows and columns of B to extract
3599 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3600 -    B_seq - the sequential matrix generated
3601 
3602     Level: developer
3603 
3604 @*/
3605 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3606 {
3607   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3608   PetscErrorCode    ierr;
3609   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3610   IS                isrowb,iscolb;
3611   Mat               *bseq;
3612 
3613   PetscFunctionBegin;
3614   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3615     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3616   }
3617   if (!logkey_GetBrowsOfAcols) {
3618     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3619   }
3620   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3621 
3622   if (scall == MAT_INITIAL_MATRIX){
3623     start = A->cmap.rstart;
3624     cmap  = a->garray;
3625     nzA   = a->A->cmap.n;
3626     nzB   = a->B->cmap.n;
3627     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3628     ncols = 0;
3629     for (i=0; i<nzB; i++) {  /* row < local row index */
3630       if (cmap[i] < start) idx[ncols++] = cmap[i];
3631       else break;
3632     }
3633     imark = i;
3634     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3635     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3636     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3637     ierr = PetscFree(idx);CHKERRQ(ierr);
3638     *brstart = imark;
3639     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3640   } else {
3641     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3642     isrowb = *rowb; iscolb = *colb;
3643     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3644     bseq[0] = *B_seq;
3645   }
3646   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3647   *B_seq = bseq[0];
3648   ierr = PetscFree(bseq);CHKERRQ(ierr);
3649   if (!rowb){
3650     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3651   } else {
3652     *rowb = isrowb;
3653   }
3654   if (!colb){
3655     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3656   } else {
3657     *colb = iscolb;
3658   }
3659   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3660   PetscFunctionReturn(0);
3661 }
3662 
3663 static PetscEvent logkey_GetBrowsOfAocols = 0;
3664 #undef __FUNCT__
3665 #define __FUNCT__ "MatGetBrowsOfAoCols"
3666 /*@C
3667     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3668     of the OFF-DIAGONAL portion of local A
3669 
3670     Collective on Mat
3671 
3672    Input Parameters:
3673 +    A,B - the matrices in mpiaij format
3674 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3675 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3676 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3677 
3678    Output Parameter:
3679 +    B_oth - the sequential matrix generated
3680 
3681     Level: developer
3682 
3683 @*/
3684 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3685 {
3686   VecScatter_MPI_General *gen_to,*gen_from;
3687   PetscErrorCode         ierr;
3688   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3689   Mat_SeqAIJ             *b_oth;
3690   VecScatter             ctx=a->Mvctx;
3691   MPI_Comm               comm=ctx->comm;
3692   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3693   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3694   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3695   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3696   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
3697   MPI_Status             *sstatus,rstatus;
3698   PetscInt               *cols;
3699   PetscScalar            *vals;
3700   PetscMPIInt            j;
3701 
3702   PetscFunctionBegin;
3703   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3704     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3705   }
3706   if (!logkey_GetBrowsOfAocols) {
3707     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3708   }
3709   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3710   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3711 
3712   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3713   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3714   rvalues  = gen_from->values; /* holds the length of sending row */
3715   svalues  = gen_to->values;   /* holds the length of receiving row */
3716   nrecvs   = gen_from->n;
3717   nsends   = gen_to->n;
3718 
3719   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
3720   srow     = gen_to->indices;   /* local row index to be sent */
3721   rstarts  = gen_from->starts;
3722   sstarts  = gen_to->starts;
3723   rprocs   = gen_from->procs;
3724   sprocs   = gen_to->procs;
3725   sstatus  = gen_to->sstatus;
3726 
3727   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3728   if (scall == MAT_INITIAL_MATRIX){
3729     /* i-array */
3730     /*---------*/
3731     /*  post receives */
3732     for (i=0; i<nrecvs; i++){
3733       rowlen = (PetscInt*)rvalues + rstarts[i];
3734       nrows = rstarts[i+1]-rstarts[i];
3735       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3736     }
3737 
3738     /* pack the outgoing message */
3739     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3740     rstartsj = sstartsj + nsends +1;
3741     sstartsj[0] = 0;  rstartsj[0] = 0;
3742     len = 0; /* total length of j or a array to be sent */
3743     k = 0;
3744     for (i=0; i<nsends; i++){
3745       rowlen = (PetscInt*)svalues + sstarts[i];
3746       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3747       for (j=0; j<nrows; j++) {
3748         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3749         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3750         len += rowlen[j];
3751         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3752         k++;
3753       }
3754       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3755        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3756     }
3757     /* recvs and sends of i-array are completed */
3758     i = nrecvs;
3759     while (i--) {
3760       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3761     }
3762     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3763     /* allocate buffers for sending j and a arrays */
3764     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3765     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3766 
3767     /* create i-array of B_oth */
3768     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3769     b_othi[0] = 0;
3770     len = 0; /* total length of j or a array to be received */
3771     k = 0;
3772     for (i=0; i<nrecvs; i++){
3773       rowlen = (PetscInt*)rvalues + rstarts[i];
3774       nrows = rstarts[i+1]-rstarts[i];
3775       for (j=0; j<nrows; j++) {
3776         b_othi[k+1] = b_othi[k] + rowlen[j];
3777         len += rowlen[j]; k++;
3778       }
3779       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3780     }
3781 
3782     /* allocate space for j and a arrrays of B_oth */
3783     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3784     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3785 
3786     /* j-array */
3787     /*---------*/
3788     /*  post receives of j-array */
3789     for (i=0; i<nrecvs; i++){
3790       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3791       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3792     }
3793     k = 0;
3794     for (i=0; i<nsends; i++){
3795       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3796       bufJ = bufj+sstartsj[i];
3797       for (j=0; j<nrows; j++) {
3798         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3799         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3800         for (l=0; l<ncols; l++){
3801           *bufJ++ = cols[l];
3802         }
3803         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3804       }
3805       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3806     }
3807 
3808     /* recvs and sends of j-array are completed */
3809     i = nrecvs;
3810     while (i--) {
3811       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3812     }
3813     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3814   } else if (scall == MAT_REUSE_MATRIX){
3815     sstartsj = *startsj;
3816     rstartsj = sstartsj + nsends +1;
3817     bufa     = *bufa_ptr;
3818     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3819     b_otha   = b_oth->a;
3820   } else {
3821     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3822   }
3823 
3824   /* a-array */
3825   /*---------*/
3826   /*  post receives of a-array */
3827   for (i=0; i<nrecvs; i++){
3828     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3829     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3830   }
3831   k = 0;
3832   for (i=0; i<nsends; i++){
3833     nrows = sstarts[i+1]-sstarts[i];
3834     bufA = bufa+sstartsj[i];
3835     for (j=0; j<nrows; j++) {
3836       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3837       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3838       for (l=0; l<ncols; l++){
3839         *bufA++ = vals[l];
3840       }
3841       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3842 
3843     }
3844     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3845   }
3846   /* recvs and sends of a-array are completed */
3847   i = nrecvs;
3848   while (i--) {
3849     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3850   }
3851   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3852   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
3853 
3854   if (scall == MAT_INITIAL_MATRIX){
3855     /* put together the new matrix */
3856     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3857 
3858     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3859     /* Since these are PETSc arrays, change flags to free them as necessary. */
3860     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
3861     b_oth->free_a  = PETSC_TRUE;
3862     b_oth->free_ij = PETSC_TRUE;
3863     b_oth->nonew   = 0;
3864 
3865     ierr = PetscFree(bufj);CHKERRQ(ierr);
3866     if (!startsj || !bufa_ptr){
3867       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3868       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3869     } else {
3870       *startsj  = sstartsj;
3871       *bufa_ptr = bufa;
3872     }
3873   }
3874   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3875 
3876   PetscFunctionReturn(0);
3877 }
3878 
3879 #undef __FUNCT__
3880 #define __FUNCT__ "MatGetCommunicationStructs"
3881 /*@C
3882   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3883 
3884   Not Collective
3885 
3886   Input Parameters:
3887 . A - The matrix in mpiaij format
3888 
3889   Output Parameter:
3890 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
3891 . colmap - A map from global column index to local index into lvec
3892 - multScatter - A scatter from the argument of a matrix-vector product to lvec
3893 
3894   Level: developer
3895 
3896 @*/
3897 #if defined (PETSC_USE_CTABLE)
3898 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
3899 #else
3900 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
3901 #endif
3902 {
3903   Mat_MPIAIJ *a;
3904 
3905   PetscFunctionBegin;
3906   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
3907   PetscValidPointer(lvec, 2)
3908   PetscValidPointer(colmap, 3)
3909   PetscValidPointer(multScatter, 4)
3910   a = (Mat_MPIAIJ *) A->data;
3911   if (lvec) *lvec = a->lvec;
3912   if (colmap) *colmap = a->colmap;
3913   if (multScatter) *multScatter = a->Mvctx;
3914   PetscFunctionReturn(0);
3915 }
3916 
3917 /*MC
3918    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3919 
3920    Options Database Keys:
3921 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3922 
3923   Level: beginner
3924 
3925 .seealso: MatCreateMPIAIJ
3926 M*/
3927 
3928 EXTERN_C_BEGIN
3929 #undef __FUNCT__
3930 #define __FUNCT__ "MatCreate_MPIAIJ"
3931 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3932 {
3933   Mat_MPIAIJ     *b;
3934   PetscErrorCode ierr;
3935   PetscMPIInt    size;
3936 
3937   PetscFunctionBegin;
3938   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3939 
3940   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3941   B->data         = (void*)b;
3942   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3943   B->factor       = 0;
3944   B->rmap.bs      = 1;
3945   B->assembled    = PETSC_FALSE;
3946   B->mapping      = 0;
3947 
3948   B->insertmode      = NOT_SET_VALUES;
3949   b->size            = size;
3950   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3951 
3952   /* build cache for off array entries formed */
3953   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3954   b->donotstash  = PETSC_FALSE;
3955   b->colmap      = 0;
3956   b->garray      = 0;
3957   b->roworiented = PETSC_TRUE;
3958 
3959   /* stuff used for matrix vector multiply */
3960   b->lvec      = PETSC_NULL;
3961   b->Mvctx     = PETSC_NULL;
3962 
3963   /* stuff for MatGetRow() */
3964   b->rowindices   = 0;
3965   b->rowvalues    = 0;
3966   b->getrowactive = PETSC_FALSE;
3967 
3968 
3969   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3970                                      "MatStoreValues_MPIAIJ",
3971                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3972   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3973                                      "MatRetrieveValues_MPIAIJ",
3974                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3975   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3976 				     "MatGetDiagonalBlock_MPIAIJ",
3977                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3978   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3979 				     "MatIsTranspose_MPIAIJ",
3980 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3981   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3982 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3983 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3984   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3985 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3986 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3987   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3988 				     "MatDiagonalScaleLocal_MPIAIJ",
3989 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3990   PetscFunctionReturn(0);
3991 }
3992 EXTERN_C_END
3993 
3994 /*
3995     Special version for direct calls from Fortran
3996 */
3997 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3998 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
3999 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4000 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4001 #endif
4002 
4003 /* Change these macros so can be used in void function */
4004 #undef CHKERRQ
4005 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4006 #undef SETERRQ2
4007 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4008 #undef SETERRQ
4009 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4010 
4011 EXTERN_C_BEGIN
4012 #undef __FUNCT__
4013 #define __FUNCT__ "matsetvaluesmpiaij_"
4014 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4015 {
4016   Mat            mat = *mmat;
4017   PetscInt       m = *mm, n = *mn;
4018   InsertMode     addv = *maddv;
4019   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4020   PetscScalar    value;
4021   PetscErrorCode ierr;
4022 
4023   MatPreallocated(mat);
4024   if (mat->insertmode == NOT_SET_VALUES) {
4025     mat->insertmode = addv;
4026   }
4027 #if defined(PETSC_USE_DEBUG)
4028   else if (mat->insertmode != addv) {
4029     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4030   }
4031 #endif
4032   {
4033   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4034   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4035   PetscTruth     roworiented = aij->roworiented;
4036 
4037   /* Some Variables required in the macro */
4038   Mat            A = aij->A;
4039   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4040   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4041   PetscScalar    *aa = a->a;
4042   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4043   Mat            B = aij->B;
4044   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4045   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4046   PetscScalar    *ba = b->a;
4047 
4048   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4049   PetscInt       nonew = a->nonew;
4050   PetscScalar    *ap1,*ap2;
4051 
4052   PetscFunctionBegin;
4053   for (i=0; i<m; i++) {
4054     if (im[i] < 0) continue;
4055 #if defined(PETSC_USE_DEBUG)
4056     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4057 #endif
4058     if (im[i] >= rstart && im[i] < rend) {
4059       row      = im[i] - rstart;
4060       lastcol1 = -1;
4061       rp1      = aj + ai[row];
4062       ap1      = aa + ai[row];
4063       rmax1    = aimax[row];
4064       nrow1    = ailen[row];
4065       low1     = 0;
4066       high1    = nrow1;
4067       lastcol2 = -1;
4068       rp2      = bj + bi[row];
4069       ap2      = ba + bi[row];
4070       rmax2    = bimax[row];
4071       nrow2    = bilen[row];
4072       low2     = 0;
4073       high2    = nrow2;
4074 
4075       for (j=0; j<n; j++) {
4076         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4077         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4078         if (in[j] >= cstart && in[j] < cend){
4079           col = in[j] - cstart;
4080           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4081         } else if (in[j] < 0) continue;
4082 #if defined(PETSC_USE_DEBUG)
4083         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4084 #endif
4085         else {
4086           if (mat->was_assembled) {
4087             if (!aij->colmap) {
4088               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4089             }
4090 #if defined (PETSC_USE_CTABLE)
4091             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4092 	    col--;
4093 #else
4094             col = aij->colmap[in[j]] - 1;
4095 #endif
4096             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4097               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4098               col =  in[j];
4099               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4100               B = aij->B;
4101               b = (Mat_SeqAIJ*)B->data;
4102               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4103               rp2      = bj + bi[row];
4104               ap2      = ba + bi[row];
4105               rmax2    = bimax[row];
4106               nrow2    = bilen[row];
4107               low2     = 0;
4108               high2    = nrow2;
4109               bm       = aij->B->rmap.n;
4110               ba = b->a;
4111             }
4112           } else col = in[j];
4113           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4114         }
4115       }
4116     } else {
4117       if (!aij->donotstash) {
4118         if (roworiented) {
4119           if (ignorezeroentries && v[i*n] == 0.0) continue;
4120           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4121         } else {
4122           if (ignorezeroentries && v[i] == 0.0) continue;
4123           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4124         }
4125       }
4126     }
4127   }}
4128   PetscFunctionReturnVoid();
4129 }
4130 EXTERN_C_END
4131